1 /* set-fields.c -- common functions for parsing field list
2 Copyright (C) 2015-2023 Free Software Foundation, Inc.
3
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
16
17 /* Extracted from cut.c by Assaf Gordon */
18
19 #include <config.h>
20
21 #include "system.h"
22 #include "quote.h"
23 #include "set-fields.h"
24
25 /* Array of `struct field_range_pair' holding all the finite ranges. */
26 struct field_range_pair *frp;
27
28 /* Number of finite ranges specified by the user. */
29 size_t n_frp;
30
31 /* Number of `struct field_range_pair's allocated. */
32 static size_t n_frp_allocated;
33
34 #define FATAL_ERROR(Message) \
35 do \
36 { \
37 error (0, 0, (Message)); \
38 usage (EXIT_FAILURE); \
39 } \
40 while (0)
41
42 /* Append LOW, HIGH to the list RP of range pairs, allocating additional
43 space if necessary. Update global variable N_FRP. When allocating,
44 update global variable N_FRP_ALLOCATED. */
45 static void
add_range_pair(uintmax_t lo,uintmax_t hi)46 add_range_pair (uintmax_t lo, uintmax_t hi)
47 {
48 if (n_frp == n_frp_allocated)
49 frp = X2NREALLOC (frp, &n_frp_allocated);
50 frp[n_frp].lo = lo;
51 frp[n_frp].hi = hi;
52 ++n_frp;
53 }
54
55
56 /* Comparison function for qsort to order the list of
57 struct field_range_pairs. */
58 static int
compare_ranges(const void * a,const void * b)59 compare_ranges (const void *a, const void *b)
60 {
61 struct field_range_pair const *ap = a, *bp = b;
62 return (ap->lo > bp->lo) - (ap->lo < bp->lo);
63 }
64
65 /* Reallocate Range Pair entries, with corresponding
66 entries outside the range of each specified entry. */
67
68 static void
complement_rp(void)69 complement_rp (void)
70 {
71 struct field_range_pair *c = frp;
72 size_t n = n_frp;
73
74 frp = nullptr;
75 n_frp = 0;
76 n_frp_allocated = 0;
77
78 if (c[0].lo > 1)
79 add_range_pair (1, c[0].lo - 1);
80
81 for (size_t i = 1; i < n; ++i)
82 {
83 if (c[i - 1].hi + 1 == c[i].lo)
84 continue;
85
86 add_range_pair (c[i - 1].hi + 1, c[i].lo - 1);
87 }
88
89 if (c[n - 1].hi < UINTMAX_MAX)
90 add_range_pair (c[n - 1].hi + 1, UINTMAX_MAX);
91
92 free (c);
93 }
94
95 /* Given the list of field or byte range specifications FIELDSTR,
96 allocate and initialize the FRP array. FIELDSTR should
97 be composed of one or more numbers or ranges of numbers, separated
98 by blanks or commas. Incomplete ranges may be given: '-m' means '1-m';
99 'n-' means 'n' through end of line.
100 n=0 and n>=UINTMAX_MAX values will trigger an error.
101
102 if SETFLD_ALLOW_DASH option is used, a single '-' means all fields
103 (otherwise a single dash triggers an error).
104
105 if SETFLD_COMPLEMENT option is used, the specified field list
106 is complemented (e.g. '1-3' will result in fields '4-').
107
108 if SETFLD_ERRMSG_USE_POS option is used, error messages
109 will say 'position' (or 'byte/character positions')
110 instead of fields (used with cut -b/-c).
111
112 The function terminates on failure.
113
114 Upon return, the FRP array is initialized to contain
115 a non-overlapping, increasing list of field ranges.
116
117 N_FRP holds the number of field ranges in the FRP array.
118
119 The first field is stored as 1 (zero is not used).
120 An open-ended range (i.e., until the last field of the input line)
121 is indicated with hi = UINTMAX_MAX.
122
123 A sentinel of UINTMAX_MAX/UINTMAX_MAX is always added as the last
124 field range pair.
125
126 Examples:
127 given '1-2,4', frp = [ { .lo = 1, .hi = 2 },
128 { .lo = 4, .hi = 4 },
129 { .lo = UINTMAX_MAX, .hi = UINTMAX_MAX } ];
130
131 given '3-', frp = [ { .lo = 3, .hi = UINTMAX_MAX },
132 { .lo = UINTMAX_MAX, .hi = UINTMAX_MAX } ];
133 */
134 void
set_fields(char const * fieldstr,unsigned int options)135 set_fields (char const *fieldstr, unsigned int options)
136 {
137 uintmax_t initial = 1; /* Value of first number in a range. */
138 uintmax_t value = 0; /* If nonzero, a number being accumulated. */
139 bool lhs_specified = false;
140 bool rhs_specified = false;
141 bool dash_found = false; /* True if a '-' is found in this field. */
142
143 bool in_digits = false;
144
145 /* Collect and store in RP the range end points. */
146
147 /* Special case: '--field=-' means all fields, emulate '--field=1-' . */
148 if ((options & SETFLD_ALLOW_DASH) && STREQ (fieldstr,"-"))
149 {
150 value = 1;
151 lhs_specified = true;
152 dash_found = true;
153 fieldstr++;
154 }
155
156 while (true)
157 {
158 if (*fieldstr == '-')
159 {
160 in_digits = false;
161 /* Starting a range. */
162 if (dash_found)
163 FATAL_ERROR ((options & SETFLD_ERRMSG_USE_POS)
164 ? _("invalid byte or character range")
165 : _("invalid field range"));
166
167 dash_found = true;
168 fieldstr++;
169
170 if (lhs_specified && !value)
171 FATAL_ERROR ((options & SETFLD_ERRMSG_USE_POS)
172 ? _("byte/character positions are numbered from 1")
173 : _("fields are numbered from 1"));
174
175 initial = (lhs_specified ? value : 1);
176 value = 0;
177 }
178 else if (*fieldstr == ','
179 || isblank (to_uchar (*fieldstr)) || *fieldstr == '\0')
180 {
181 in_digits = false;
182 /* Ending the string, or this field/byte sublist. */
183 if (dash_found)
184 {
185 dash_found = false;
186
187 if (!lhs_specified && !rhs_specified)
188 {
189 /* if a lone dash is allowed, emulate '1-' for all fields */
190 if (options & SETFLD_ALLOW_DASH)
191 initial = 1;
192 else
193 FATAL_ERROR (_("invalid range with no endpoint: -"));
194 }
195
196 /* A range. Possibilities: -n, m-n, n-.
197 In any case, 'initial' contains the start of the range. */
198 if (!rhs_specified)
199 {
200 /* 'n-'. From 'initial' to end of line. */
201 add_range_pair (initial, UINTMAX_MAX);
202 }
203 else
204 {
205 /* 'm-n' or '-n' (1-n). */
206 if (value < initial)
207 FATAL_ERROR (_("invalid decreasing range"));
208
209 add_range_pair (initial, value);
210 }
211 value = 0;
212 }
213 else
214 {
215 /* A simple field number, not a range. */
216 if (value == 0)
217 FATAL_ERROR ((options & SETFLD_ERRMSG_USE_POS)
218 ? _("byte/character positions are numbered from 1")
219 : _("fields are numbered from 1"));
220
221 add_range_pair (value, value);
222 value = 0;
223 }
224
225 if (*fieldstr == '\0')
226 break;
227
228 fieldstr++;
229 lhs_specified = false;
230 rhs_specified = false;
231 }
232 else if (ISDIGIT (*fieldstr))
233 {
234 /* Record beginning of digit string, in case we have to
235 complain about it. */
236 static char const *num_start;
237 if (!in_digits || !num_start)
238 num_start = fieldstr;
239 in_digits = true;
240
241 if (dash_found)
242 rhs_specified = 1;
243 else
244 lhs_specified = 1;
245
246 /* Detect overflow. */
247 if (!DECIMAL_DIGIT_ACCUMULATE (value, *fieldstr - '0', uintmax_t)
248 || value == UINTMAX_MAX)
249 {
250 /* In case the user specified -c$(echo 2^64|bc),22,
251 complain only about the first number. */
252 /* Determine the length of the offending number. */
253 size_t len = strspn (num_start, "0123456789");
254 char *bad_num = ximemdup0 (num_start, len);
255 error (0, 0, (options & SETFLD_ERRMSG_USE_POS)
256 ?_("byte/character offset %s is too large")
257 :_("field number %s is too large"),
258 quote (bad_num));
259 free (bad_num);
260 usage (EXIT_FAILURE);
261 }
262
263 fieldstr++;
264 }
265 else
266 {
267 error (0, 0, (options & SETFLD_ERRMSG_USE_POS)
268 ?_("invalid byte/character position %s")
269 :_("invalid field value %s"),
270 quote (fieldstr));
271 usage (EXIT_FAILURE);
272 }
273 }
274
275 if (!n_frp)
276 FATAL_ERROR ((options&SETFLD_ERRMSG_USE_POS)
277 ? _("missing list of byte/character positions")
278 : _("missing list of fields"));
279
280 qsort (frp, n_frp, sizeof (frp[0]), compare_ranges);
281
282 /* Merge range pairs (e.g. `2-5,3-4' becomes `2-5'). */
283 for (size_t i = 0; i < n_frp; ++i)
284 {
285 for (size_t j = i + 1; j < n_frp; ++j)
286 {
287 if (frp[j].lo <= frp[i].hi)
288 {
289 frp[i].hi = MAX (frp[j].hi, frp[i].hi);
290 memmove (frp + j, frp + j + 1, (n_frp - j - 1) * sizeof *frp);
291 n_frp--;
292 j--;
293 }
294 else
295 break;
296 }
297 }
298
299 if (options & SETFLD_COMPLEMENT)
300 complement_rp ();
301
302 /* After merging, reallocate RP so we release memory to the system.
303 Also add a sentinel at the end of RP, to avoid out of bounds access
304 and for performance reasons. */
305 ++n_frp;
306 frp = xrealloc (frp, n_frp * sizeof (struct field_range_pair));
307 frp[n_frp - 1].lo = frp[n_frp - 1].hi = UINTMAX_MAX;
308 }
309