1 /* set-fields.c -- common functions for parsing field list
2    Copyright (C) 2015-2023 Free Software Foundation, Inc.
3 
4    This program is free software: you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation, either version 3 of the License, or
7    (at your option) any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
16 
17 /* Extracted from cut.c by Assaf Gordon */
18 
19 #include <config.h>
20 
21 #include "system.h"
22 #include "quote.h"
23 #include "set-fields.h"
24 
25 /* Array of `struct field_range_pair' holding all the finite ranges. */
26 struct field_range_pair *frp;
27 
28 /* Number of finite ranges specified by the user. */
29 size_t n_frp;
30 
31 /* Number of `struct field_range_pair's allocated. */
32 static size_t n_frp_allocated;
33 
34 #define FATAL_ERROR(Message)                                            \
35   do                                                                    \
36     {                                                                   \
37       error (0, 0, (Message));                                          \
38       usage (EXIT_FAILURE);                                             \
39     }                                                                   \
40   while (0)
41 
42 /* Append LOW, HIGH to the list RP of range pairs, allocating additional
43    space if necessary.  Update global variable N_FRP.  When allocating,
44    update global variable N_FRP_ALLOCATED.  */
45 static void
add_range_pair(uintmax_t lo,uintmax_t hi)46 add_range_pair (uintmax_t lo, uintmax_t hi)
47 {
48   if (n_frp == n_frp_allocated)
49     frp = X2NREALLOC (frp, &n_frp_allocated);
50   frp[n_frp].lo = lo;
51   frp[n_frp].hi = hi;
52   ++n_frp;
53 }
54 
55 
56 /* Comparison function for qsort to order the list of
57    struct field_range_pairs.  */
58 static int
compare_ranges(const void * a,const void * b)59 compare_ranges (const void *a, const void *b)
60 {
61   struct field_range_pair const *ap = a, *bp = b;
62   return (ap->lo > bp->lo) - (ap->lo < bp->lo);
63 }
64 
65 /* Reallocate Range Pair entries, with corresponding
66    entries outside the range of each specified entry.  */
67 
68 static void
complement_rp(void)69 complement_rp (void)
70 {
71   struct field_range_pair *c = frp;
72   size_t n = n_frp;
73 
74   frp = nullptr;
75   n_frp = 0;
76   n_frp_allocated = 0;
77 
78   if (c[0].lo > 1)
79     add_range_pair (1, c[0].lo - 1);
80 
81   for (size_t i = 1; i < n; ++i)
82     {
83       if (c[i - 1].hi + 1 == c[i].lo)
84         continue;
85 
86       add_range_pair (c[i - 1].hi + 1, c[i].lo - 1);
87     }
88 
89   if (c[n - 1].hi < UINTMAX_MAX)
90     add_range_pair (c[n - 1].hi + 1, UINTMAX_MAX);
91 
92   free (c);
93 }
94 
95 /* Given the list of field or byte range specifications FIELDSTR,
96    allocate and initialize the FRP array. FIELDSTR should
97    be composed of one or more numbers or ranges of numbers, separated
98    by blanks or commas.  Incomplete ranges may be given: '-m' means '1-m';
99    'n-' means 'n' through end of line.
100    n=0 and n>=UINTMAX_MAX values will trigger an error.
101 
102    if SETFLD_ALLOW_DASH option is used, a single '-' means all fields
103    (otherwise a single dash triggers an error).
104 
105    if SETFLD_COMPLEMENT option is used, the specified field list
106    is complemented (e.g. '1-3' will result in fields '4-').
107 
108    if SETFLD_ERRMSG_USE_POS option is used, error messages
109    will say 'position' (or 'byte/character positions')
110    instead of fields (used with cut -b/-c).
111 
112    The function terminates on failure.
113 
114    Upon return, the FRP array is initialized to contain
115    a non-overlapping, increasing list of field ranges.
116 
117    N_FRP holds the number of field ranges in the FRP array.
118 
119    The first field is stored as 1 (zero is not used).
120    An open-ended range (i.e., until the last field of the input line)
121    is indicated with hi = UINTMAX_MAX.
122 
123    A sentinel of UINTMAX_MAX/UINTMAX_MAX is always added as the last
124    field range pair.
125 
126    Examples:
127    given '1-2,4', frp = [ { .lo = 1,           .hi = 2 },
128                           { .lo = 4,           .hi = 4 },
129                           { .lo = UINTMAX_MAX, .hi = UINTMAX_MAX } ];
130 
131    given '3-',    frp = [ { .lo = 3,           .hi = UINTMAX_MAX },
132                           { .lo = UINTMAX_MAX, .hi = UINTMAX_MAX } ];
133 */
134 void
set_fields(char const * fieldstr,unsigned int options)135 set_fields (char const *fieldstr, unsigned int options)
136 {
137   uintmax_t initial = 1;	/* Value of first number in a range.  */
138   uintmax_t value = 0;		/* If nonzero, a number being accumulated.  */
139   bool lhs_specified = false;
140   bool rhs_specified = false;
141   bool dash_found = false;	/* True if a '-' is found in this field.  */
142 
143   bool in_digits = false;
144 
145   /* Collect and store in RP the range end points. */
146 
147   /* Special case: '--field=-' means all fields, emulate '--field=1-' . */
148   if ((options & SETFLD_ALLOW_DASH) && STREQ (fieldstr,"-"))
149     {
150       value = 1;
151       lhs_specified = true;
152       dash_found = true;
153       fieldstr++;
154     }
155 
156   while (true)
157     {
158       if (*fieldstr == '-')
159         {
160           in_digits = false;
161           /* Starting a range. */
162           if (dash_found)
163             FATAL_ERROR ((options & SETFLD_ERRMSG_USE_POS)
164                          ? _("invalid byte or character range")
165                          : _("invalid field range"));
166 
167           dash_found = true;
168           fieldstr++;
169 
170           if (lhs_specified && !value)
171             FATAL_ERROR ((options & SETFLD_ERRMSG_USE_POS)
172                          ? _("byte/character positions are numbered from 1")
173                          : _("fields are numbered from 1"));
174 
175           initial = (lhs_specified ? value : 1);
176           value = 0;
177         }
178       else if (*fieldstr == ','
179                || isblank (to_uchar (*fieldstr)) || *fieldstr == '\0')
180         {
181           in_digits = false;
182           /* Ending the string, or this field/byte sublist. */
183           if (dash_found)
184             {
185               dash_found = false;
186 
187               if (!lhs_specified && !rhs_specified)
188                 {
189                   /* if a lone dash is allowed, emulate '1-' for all fields */
190                   if (options & SETFLD_ALLOW_DASH)
191                     initial = 1;
192                   else
193                     FATAL_ERROR (_("invalid range with no endpoint: -"));
194                 }
195 
196               /* A range.  Possibilities: -n, m-n, n-.
197                  In any case, 'initial' contains the start of the range. */
198               if (!rhs_specified)
199                 {
200                   /* 'n-'.  From 'initial' to end of line. */
201                   add_range_pair (initial, UINTMAX_MAX);
202                 }
203               else
204                 {
205                   /* 'm-n' or '-n' (1-n). */
206                   if (value < initial)
207                     FATAL_ERROR (_("invalid decreasing range"));
208 
209                   add_range_pair (initial, value);
210                 }
211               value = 0;
212             }
213           else
214             {
215               /* A simple field number, not a range. */
216               if (value == 0)
217                 FATAL_ERROR ((options & SETFLD_ERRMSG_USE_POS)
218                              ? _("byte/character positions are numbered from 1")
219                              : _("fields are numbered from 1"));
220 
221               add_range_pair (value, value);
222               value = 0;
223             }
224 
225           if (*fieldstr == '\0')
226             break;
227 
228           fieldstr++;
229           lhs_specified = false;
230           rhs_specified = false;
231         }
232       else if (ISDIGIT (*fieldstr))
233         {
234           /* Record beginning of digit string, in case we have to
235              complain about it.  */
236           static char const *num_start;
237           if (!in_digits || !num_start)
238             num_start = fieldstr;
239           in_digits = true;
240 
241           if (dash_found)
242             rhs_specified = 1;
243           else
244             lhs_specified = 1;
245 
246           /* Detect overflow.  */
247           if (!DECIMAL_DIGIT_ACCUMULATE (value, *fieldstr - '0', uintmax_t)
248               || value == UINTMAX_MAX)
249             {
250               /* In case the user specified -c$(echo 2^64|bc),22,
251                  complain only about the first number.  */
252               /* Determine the length of the offending number.  */
253               size_t len = strspn (num_start, "0123456789");
254               char *bad_num = ximemdup0 (num_start, len);
255               error (0, 0, (options & SETFLD_ERRMSG_USE_POS)
256                            ?_("byte/character offset %s is too large")
257                            :_("field number %s is too large"),
258                            quote (bad_num));
259               free (bad_num);
260               usage (EXIT_FAILURE);
261             }
262 
263           fieldstr++;
264         }
265       else
266         {
267           error (0, 0, (options & SETFLD_ERRMSG_USE_POS)
268                        ?_("invalid byte/character position %s")
269                        :_("invalid field value %s"),
270                        quote (fieldstr));
271           usage (EXIT_FAILURE);
272         }
273     }
274 
275   if (!n_frp)
276     FATAL_ERROR ((options&SETFLD_ERRMSG_USE_POS)
277                  ? _("missing list of byte/character positions")
278                  : _("missing list of fields"));
279 
280   qsort (frp, n_frp, sizeof (frp[0]), compare_ranges);
281 
282   /* Merge range pairs (e.g. `2-5,3-4' becomes `2-5'). */
283   for (size_t i = 0; i < n_frp; ++i)
284     {
285       for (size_t j = i + 1; j < n_frp; ++j)
286         {
287           if (frp[j].lo <= frp[i].hi)
288             {
289               frp[i].hi = MAX (frp[j].hi, frp[i].hi);
290               memmove (frp + j, frp + j + 1, (n_frp - j - 1) * sizeof *frp);
291               n_frp--;
292               j--;
293             }
294           else
295             break;
296         }
297     }
298 
299   if (options & SETFLD_COMPLEMENT)
300     complement_rp ();
301 
302   /* After merging, reallocate RP so we release memory to the system.
303      Also add a sentinel at the end of RP, to avoid out of bounds access
304      and for performance reasons.  */
305   ++n_frp;
306   frp = xrealloc (frp, n_frp * sizeof (struct field_range_pair));
307   frp[n_frp - 1].lo = frp[n_frp - 1].hi = UINTMAX_MAX;
308 }
309