1 /* nl -- number lines of files
2    Copyright (C) 1989-2023 Free Software Foundation, Inc.
3 
4    This program is free software: you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation, either version 3 of the License, or
7    (at your option) any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
16 
17 /* Written by Scott Bartram (nancy!scott@uunet.uu.net)
18    Revised by David MacKenzie (djm@gnu.ai.mit.edu) */
19 
20 #include <config.h>
21 
22 #include <stdckdint.h>
23 #include <stdio.h>
24 #include <sys/types.h>
25 #include <getopt.h>
26 
27 #include "system.h"
28 
29 #include <regex.h>
30 
31 #include "fadvise.h"
32 #include "linebuffer.h"
33 #include "quote.h"
34 #include "xdectoint.h"
35 
36 /* The official name of this program (e.g., no 'g' prefix).  */
37 #define PROGRAM_NAME "nl"
38 
39 #define AUTHORS \
40   proper_name ("Scott Bartram"), \
41   proper_name ("David MacKenzie")
42 
43 /* Line-number formats.  They are given an int width, an intmax_t
44    value, and a string separator.  */
45 
46 /* Right justified, no leading zeroes.  */
47 static char const FORMAT_RIGHT_NOLZ[] = "%*jd%s";
48 
49 /* Right justified, leading zeroes.  */
50 static char const FORMAT_RIGHT_LZ[] = "%0*jd%s";
51 
52 /* Left justified, no leading zeroes.  */
53 static char const FORMAT_LEFT[] = "%-*jd%s";
54 
55 /* Default section delimiter characters.  */
56 static char DEFAULT_SECTION_DELIMITERS[] = "\\:";
57 
58 /* Types of input lines: either one of the section delimiters,
59    or text to output. */
60 enum section
61 {
62   Header, Body, Footer, Text
63 };
64 
65 /* Format of body lines (-b).  */
66 static char const *body_type = "t";
67 
68 /* Format of header lines (-h).  */
69 static char const *header_type = "n";
70 
71 /* Format of footer lines (-f).  */
72 static char const *footer_type = "n";
73 
74 /* Format currently being used (body, header, or footer).  */
75 static char const *current_type;
76 
77 /* Regex for body lines to number (-bp).  */
78 static struct re_pattern_buffer body_regex;
79 
80 /* Regex for header lines to number (-hp).  */
81 static struct re_pattern_buffer header_regex;
82 
83 /* Regex for footer lines to number (-fp).  */
84 static struct re_pattern_buffer footer_regex;
85 
86 /* Fastmaps for the above.  */
87 static char body_fastmap[UCHAR_MAX + 1];
88 static char header_fastmap[UCHAR_MAX + 1];
89 static char footer_fastmap[UCHAR_MAX + 1];
90 
91 /* Pointer to current regex, if any.  */
92 static struct re_pattern_buffer *current_regex = nullptr;
93 
94 /* Separator string to print after line number (-s).  */
95 static char const *separator_str = "\t";
96 
97 /* Input section delimiter string (-d).  */
98 static char *section_del = DEFAULT_SECTION_DELIMITERS;
99 
100 /* Header delimiter string.  */
101 static char *header_del = nullptr;
102 
103 /* Header section delimiter length.  */
104 static size_t header_del_len;
105 
106 /* Body delimiter string.  */
107 static char *body_del = nullptr;
108 
109 /* Body section delimiter length.  */
110 static size_t body_del_len;
111 
112 /* Footer delimiter string.  */
113 static char *footer_del = nullptr;
114 
115 /* Footer section delimiter length.  */
116 static size_t footer_del_len;
117 
118 /* Input buffer.  */
119 static struct linebuffer line_buf;
120 
121 /* printf format string for unnumbered lines.  */
122 static char *print_no_line_fmt = nullptr;
123 
124 /* Starting line number on each page (-v).  */
125 static intmax_t starting_line_number = 1;
126 
127 /* Line number increment (-i).  */
128 static intmax_t page_incr = 1;
129 
130 /* If true, reset line number at start of each page (-p).  */
131 static bool reset_numbers = true;
132 
133 /* Number of blank lines to consider to be one line for numbering (-l).  */
134 static intmax_t blank_join = 1;
135 
136 /* Width of line numbers (-w).  */
137 static int lineno_width = 6;
138 
139 /* Line number format (-n).  */
140 static char const *lineno_format = FORMAT_RIGHT_NOLZ;
141 
142 /* Current print line number.  */
143 static intmax_t line_no;
144 
145 /* Whether the current line number has incremented past limits.  */
146 static bool line_no_overflow;
147 
148 /* True if we have ever read standard input.  */
149 static bool have_read_stdin;
150 
151 static struct option const longopts[] =
152 {
153   {"header-numbering", required_argument, nullptr, 'h'},
154   {"body-numbering", required_argument, nullptr, 'b'},
155   {"footer-numbering", required_argument, nullptr, 'f'},
156   {"starting-line-number", required_argument, nullptr, 'v'},
157   {"line-increment", required_argument, nullptr, 'i'},
158   {"no-renumber", no_argument, nullptr, 'p'},
159   {"join-blank-lines", required_argument, nullptr, 'l'},
160   {"number-separator", required_argument, nullptr, 's'},
161   {"number-width", required_argument, nullptr, 'w'},
162   {"number-format", required_argument, nullptr, 'n'},
163   {"section-delimiter", required_argument, nullptr, 'd'},
164   {GETOPT_HELP_OPTION_DECL},
165   {GETOPT_VERSION_OPTION_DECL},
166   {nullptr, 0, nullptr, 0}
167 };
168 
169 /* Print a usage message and quit. */
170 
171 void
usage(int status)172 usage (int status)
173 {
174   if (status != EXIT_SUCCESS)
175     emit_try_help ();
176   else
177     {
178       printf (_("\
179 Usage: %s [OPTION]... [FILE]...\n\
180 "),
181               program_name);
182       fputs (_("\
183 Write each FILE to standard output, with line numbers added.\n\
184 "), stdout);
185 
186       emit_stdin_note ();
187       emit_mandatory_arg_note ();
188 
189       fputs (_("\
190   -b, --body-numbering=STYLE      use STYLE for numbering body lines\n\
191   -d, --section-delimiter=CC      use CC for logical page delimiters\n\
192   -f, --footer-numbering=STYLE    use STYLE for numbering footer lines\n\
193 "), stdout);
194       fputs (_("\
195   -h, --header-numbering=STYLE    use STYLE for numbering header lines\n\
196   -i, --line-increment=NUMBER     line number increment at each line\n\
197   -l, --join-blank-lines=NUMBER   group of NUMBER empty lines counted as one\n\
198   -n, --number-format=FORMAT      insert line numbers according to FORMAT\n\
199   -p, --no-renumber               do not reset line numbers for each section\n\
200   -s, --number-separator=STRING   add STRING after (possible) line number\n\
201 "), stdout);
202       fputs (_("\
203   -v, --starting-line-number=NUMBER  first line number for each section\n\
204   -w, --number-width=NUMBER       use NUMBER columns for line numbers\n\
205 "), stdout);
206       fputs (HELP_OPTION_DESCRIPTION, stdout);
207       fputs (VERSION_OPTION_DESCRIPTION, stdout);
208       fputs (_("\
209 \n\
210 Default options are: -bt -d'\\:' -fn -hn -i1 -l1 -n'rn' -s<TAB> -v1 -w6\n\
211 \n\
212 CC are two delimiter characters used to construct logical page delimiters;\n\
213 a missing second character implies ':'.  As a GNU extension one can specify\n\
214 more than two characters, and also specifying the empty string (-d '')\n\
215 disables section matching.\n\
216 "), stdout);
217       fputs (_("\
218 \n\
219 STYLE is one of:\n\
220 \n\
221   a      number all lines\n\
222   t      number only nonempty lines\n\
223   n      number no lines\n\
224   pBRE   number only lines that contain a match for the basic regular\n\
225          expression, BRE\n\
226 "), stdout);
227       fputs (_("\
228 \n\
229 FORMAT is one of:\n\
230 \n\
231   ln     left justified, no leading zeros\n\
232   rn     right justified, no leading zeros\n\
233   rz     right justified, leading zeros\n\
234 \n\
235 "), stdout);
236       emit_ancillary_info (PROGRAM_NAME);
237     }
238   exit (status);
239 }
240 
241 /* Set the command line flag TYPEP and possibly the regex pointer REGEXP,
242    according to 'optarg'.  */
243 
244 static bool
build_type_arg(char const ** typep,struct re_pattern_buffer * regexp,char * fastmap)245 build_type_arg (char const **typep,
246                 struct re_pattern_buffer *regexp, char *fastmap)
247 {
248   char const *errmsg;
249   bool rval = true;
250 
251   switch (*optarg)
252     {
253     case 'a':
254     case 't':
255     case 'n':
256       *typep = optarg;
257       break;
258     case 'p':
259       *typep = optarg++;
260       regexp->buffer = nullptr;
261       regexp->allocated = 0;
262       regexp->fastmap = fastmap;
263       regexp->translate = nullptr;
264       re_syntax_options =
265         RE_SYNTAX_POSIX_BASIC & ~RE_CONTEXT_INVALID_DUP & ~RE_NO_EMPTY_RANGES;
266       errmsg = re_compile_pattern (optarg, strlen (optarg), regexp);
267       if (errmsg)
268         error (EXIT_FAILURE, 0, "%s", (errmsg));
269       break;
270     default:
271       rval = false;
272       break;
273     }
274   return rval;
275 }
276 
277 /* Print the line number and separator; increment the line number. */
278 
279 static void
print_lineno(void)280 print_lineno (void)
281 {
282   if (line_no_overflow)
283     error (EXIT_FAILURE, 0, _("line number overflow"));
284 
285   printf (lineno_format, lineno_width, line_no, separator_str);
286 
287   if (ckd_add (&line_no, line_no, page_incr))
288     line_no_overflow = true;
289 }
290 
291 static void
reset_lineno(void)292 reset_lineno (void)
293 {
294   if (reset_numbers)
295     {
296       line_no = starting_line_number;
297       line_no_overflow = false;
298     }
299 }
300 
301 /* Switch to a header section. */
302 
303 static void
proc_header(void)304 proc_header (void)
305 {
306   current_type = header_type;
307   current_regex = &header_regex;
308   reset_lineno ();
309   putchar ('\n');
310 }
311 
312 /* Switch to a body section. */
313 
314 static void
proc_body(void)315 proc_body (void)
316 {
317   current_type = body_type;
318   current_regex = &body_regex;
319   reset_lineno ();
320   putchar ('\n');
321 }
322 
323 /* Switch to a footer section. */
324 
325 static void
proc_footer(void)326 proc_footer (void)
327 {
328   current_type = footer_type;
329   current_regex = &footer_regex;
330   reset_lineno ();
331   putchar ('\n');
332 }
333 
334 /* Process a regular text line in 'line_buf'. */
335 
336 static void
proc_text(void)337 proc_text (void)
338 {
339   static intmax_t blank_lines = 0;	/* Consecutive blank lines so far. */
340 
341   switch (*current_type)
342     {
343     case 'a':
344       if (blank_join > 1)
345         {
346           if (1 < line_buf.length || ++blank_lines == blank_join)
347             {
348               print_lineno ();
349               blank_lines = 0;
350             }
351           else
352             fputs (print_no_line_fmt, stdout);
353         }
354       else
355         print_lineno ();
356       break;
357     case 't':
358       if (1 < line_buf.length)
359         print_lineno ();
360       else
361         fputs (print_no_line_fmt, stdout);
362       break;
363     case 'n':
364       fputs (print_no_line_fmt, stdout);
365       break;
366     case 'p':
367       switch (re_search (current_regex, line_buf.buffer, line_buf.length - 1,
368                          0, line_buf.length - 1, nullptr))
369         {
370         case -2:
371           error (EXIT_FAILURE, errno, _("error in regular expression search"));
372 
373         case -1:
374           fputs (print_no_line_fmt, stdout);
375           break;
376 
377         default:
378           print_lineno ();
379           break;
380         }
381     }
382   fwrite (line_buf.buffer, sizeof (char), line_buf.length, stdout);
383 }
384 
385 /* Return the type of line in 'line_buf'. */
386 
387 static enum section
check_section(void)388 check_section (void)
389 {
390   size_t len = line_buf.length - 1;
391 
392   if (len < 2 || footer_del_len < 2
393       || memcmp (line_buf.buffer, section_del, 2))
394     return Text;
395   if (len == header_del_len
396       && !memcmp (line_buf.buffer, header_del, header_del_len))
397     return Header;
398   if (len == body_del_len
399       && !memcmp (line_buf.buffer, body_del, body_del_len))
400     return Body;
401   if (len == footer_del_len
402       && !memcmp (line_buf.buffer, footer_del, footer_del_len))
403     return Footer;
404   return Text;
405 }
406 
407 /* Read and process the file pointed to by FP. */
408 
409 static void
process_file(FILE * fp)410 process_file (FILE *fp)
411 {
412   while (readlinebuffer (&line_buf, fp))
413     {
414       switch (check_section ())
415         {
416         case Header:
417           proc_header ();
418           break;
419         case Body:
420           proc_body ();
421           break;
422         case Footer:
423           proc_footer ();
424           break;
425         case Text:
426           proc_text ();
427           break;
428         }
429     }
430 }
431 
432 /* Process file FILE to standard output.
433    Return true if successful.  */
434 
435 static bool
nl_file(char const * file)436 nl_file (char const *file)
437 {
438   FILE *stream;
439 
440   if (STREQ (file, "-"))
441     {
442       have_read_stdin = true;
443       stream = stdin;
444       assume (stream);  /* Pacify GCC bug#109613.  */
445     }
446   else
447     {
448       stream = fopen (file, "r");
449       if (stream == nullptr)
450         {
451           error (0, errno, "%s", quotef (file));
452           return false;
453         }
454     }
455 
456   fadvise (stream, FADVISE_SEQUENTIAL);
457 
458   process_file (stream);
459 
460   int err = errno;
461   if (!ferror (stream))
462     err = 0;
463   if (STREQ (file, "-"))
464     clearerr (stream);		/* Also clear EOF. */
465   else if (fclose (stream) != 0 && !err)
466     err = errno;
467   if (err)
468     {
469       error (0, err, "%s", quotef (file));
470       return false;
471     }
472   return true;
473 }
474 
475 int
main(int argc,char ** argv)476 main (int argc, char **argv)
477 {
478   int c;
479   size_t len;
480   bool ok = true;
481 
482   initialize_main (&argc, &argv);
483   set_program_name (argv[0]);
484   setlocale (LC_ALL, "");
485   bindtextdomain (PACKAGE, LOCALEDIR);
486   textdomain (PACKAGE);
487 
488   atexit (close_stdout);
489 
490   have_read_stdin = false;
491 
492   while ((c = getopt_long (argc, argv, "h:b:f:v:i:pl:s:w:n:d:", longopts,
493                            nullptr))
494          != -1)
495     {
496       switch (c)
497         {
498         case 'h':
499           if (! build_type_arg (&header_type, &header_regex, header_fastmap))
500             {
501               error (0, 0, _("invalid header numbering style: %s"),
502                      quote (optarg));
503               ok = false;
504             }
505           break;
506         case 'b':
507           if (! build_type_arg (&body_type, &body_regex, body_fastmap))
508             {
509               error (0, 0, _("invalid body numbering style: %s"),
510                      quote (optarg));
511               ok = false;
512             }
513           break;
514         case 'f':
515           if (! build_type_arg (&footer_type, &footer_regex, footer_fastmap))
516             {
517               error (0, 0, _("invalid footer numbering style: %s"),
518                      quote (optarg));
519               ok = false;
520             }
521           break;
522         case 'v':
523           starting_line_number = xdectoimax (optarg, INTMAX_MIN, INTMAX_MAX, "",
524                                              _("invalid starting line number"),
525                                              0);
526           break;
527         case 'i':
528           page_incr = xdectoimax (optarg, INTMAX_MIN, INTMAX_MAX, "",
529                                   _("invalid line number increment"), 0);
530           break;
531         case 'p':
532           reset_numbers = false;
533           break;
534         case 'l':
535           blank_join = xdectoimax (optarg, 1, INTMAX_MAX, "",
536                                    _("invalid line number of blank lines"), 0);
537           break;
538         case 's':
539           separator_str = optarg;
540           break;
541         case 'w':
542           lineno_width = xdectoimax (optarg, 1, INT_MAX, "",
543                                      _("invalid line number field width"), 0);
544           break;
545         case 'n':
546           if (STREQ (optarg, "ln"))
547             lineno_format = FORMAT_LEFT;
548           else if (STREQ (optarg, "rn"))
549             lineno_format = FORMAT_RIGHT_NOLZ;
550           else if (STREQ (optarg, "rz"))
551             lineno_format = FORMAT_RIGHT_LZ;
552           else
553             {
554               error (0, 0, _("invalid line numbering format: %s"),
555                      quote (optarg));
556               ok = false;
557             }
558           break;
559         case 'd':
560           len = strlen (optarg);
561           if (len == 1 || len == 2)  /* POSIX.  */
562             {
563               char *p = section_del;
564               while (*optarg)
565                 *p++ = *optarg++;
566             }
567           else
568             section_del = optarg;  /* GNU extension.  */
569           break;
570         case_GETOPT_HELP_CHAR;
571         case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
572         default:
573           ok = false;
574           break;
575         }
576     }
577 
578   if (!ok)
579     usage (EXIT_FAILURE);
580 
581   /* Initialize the section delimiters.  */
582   len = strlen (section_del);
583 
584   header_del_len = len * 3;
585   header_del = xmalloc (header_del_len + 1);
586   stpcpy (stpcpy (stpcpy (header_del, section_del), section_del), section_del);
587 
588   body_del_len = len * 2;
589   body_del = header_del + len;
590 
591   footer_del_len = len;
592   footer_del = body_del + len;
593 
594   /* Initialize the input buffer.  */
595   initbuffer (&line_buf);
596 
597   /* Initialize the printf format for unnumbered lines. */
598   len = strlen (separator_str);
599   print_no_line_fmt = xmalloc (lineno_width + len + 1);
600   memset (print_no_line_fmt, ' ', lineno_width + len);
601   print_no_line_fmt[lineno_width + len] = '\0';
602 
603   line_no = starting_line_number;
604   current_type = body_type;
605   current_regex = &body_regex;
606 
607   /* Main processing. */
608 
609   if (optind == argc)
610     ok = nl_file ("-");
611   else
612     for (; optind < argc; optind++)
613       ok &= nl_file (argv[optind]);
614 
615   if (have_read_stdin && fclose (stdin) == EOF)
616     error (EXIT_FAILURE, errno, "-");
617 
618   return ok ? EXIT_SUCCESS : EXIT_FAILURE;
619 }
620