1#!/usr/bin/perl
2# Test uniq.
3
4# Copyright (C) 2008-2023 Free Software Foundation, Inc.
5
6# This program is free software: you can redistribute it and/or modify
7# it under the terms of the GNU General Public License as published by
8# the Free Software Foundation, either version 3 of the License, or
9# (at your option) any later version.
10
11# This program is distributed in the hope that it will be useful,
12# but WITHOUT ANY WARRANTY; without even the implied warranty of
13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14# GNU General Public License for more details.
15
16# You should have received a copy of the GNU General Public License
17# along with this program.  If not, see <https://www.gnu.org/licenses/>.
18
19use strict;
20
21my $limits = getlimits ();
22
23my $prog = 'uniq';
24my $try = "Try '$prog --help' for more information.\n";
25
26# Turn off localization of executable's output.
27@ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3;
28
29# When possible, create a "-z"-testing variant of each test.
30sub add_z_variants($)
31{
32  my ($tests) = @_;
33  my @new;
34 TEST:
35  foreach my $t (@$tests)
36    {
37      push @new, $t;
38
39      # skip the obsolete-syntax tests
40      $t->[0] =~ /^obs-plus/
41        and next;
42
43      my @args;
44      my @list_of_hash;
45
46      foreach my $e (@$t)
47        {
48          !ref $e
49            and push (@args, $e), next;
50
51          ref $e && ref $e eq 'HASH'
52            or (warn "$0: $t->[0]: unexpected entry type\n"), next;
53          my $tmp = $e;
54          foreach my $k (qw(IN OUT))
55            {
56              my $val = $e->{$k};
57              # skip any test whose input or output already contains a NUL byte
58              if (defined $val)
59                {
60                  $val =~ /\0/
61                    and next TEST;
62
63                  # Convert each NL in input or output to \0.
64                  $val =~ s/\n/\0/g;
65                  $tmp = {$k => $val};
66                  last;
67                }
68            }
69          push @list_of_hash, $tmp;
70        }
71
72      shift @args; # discard test name
73
74      # skip any test that uses the -z option
75      grep /z/, @args
76        and next;
77
78      push @new, ["$t->[0]-z", '-z', @args, @list_of_hash];
79    }
80  return @new;
81}
82
83my @Tests =
84(
85 ['1', '', {IN=>''}, {OUT=>''}],
86 ['2', '', {IN=>"a\na\n"}, {OUT=>"a\n"}],
87 ['3', '', {IN=>"a\na"}, {OUT=>"a\n"}],
88 ['4', '', {IN=>"a\nb"}, {OUT=>"a\nb\n"}],
89 ['5', '', {IN=>"a\na\nb"}, {OUT=>"a\nb\n"}],
90 ['6', '', {IN=>"b\na\na\n"}, {OUT=>"b\na\n"}],
91 ['7', '', {IN=>"a\nb\nc\n"}, {OUT=>"a\nb\nc\n"}],
92
93 # Ensure that newlines are not interpreted with -z.
94 ['2z', '-z', {IN=>"a\na\n"}, {OUT=>"a\na\n\0"}],
95 ['3z', '-z', {IN=>"a\na"}, {OUT=>"a\na\0"}],
96 ['4z', '-z', {IN=>"a\nb"}, {OUT=>"a\nb\0"}],
97 ['5z', '-z', {IN=>"a\na\nb"}, {OUT=>"a\na\nb\0"}],
98 ['10z', '-z -f1', {IN=>"a\nb\n\0c\nb\n\0"}, {OUT=>"a\nb\n\0"}],
99 ['20z', '-dz', {IN=>"a\na\n"}, {OUT=>""}],
100
101 # Make sure that eight bit characters work
102 ['8', '', {IN=>"ö\nv\n"}, {OUT=>"ö\nv\n"}],
103 # Test output of -u option; only unique lines
104 ['9', '-u', {IN=>"a\na\n"}, {OUT=>""}],
105 ['10', '-u', {IN=>"a\nb\n"}, {OUT=>"a\nb\n"}],
106 ['11', '-u', {IN=>"a\nb\na\n"}, {OUT=>"a\nb\na\n"}],
107 ['12', '-u', {IN=>"a\na\n"}, {OUT=>""}],
108 ['13', '-u', {IN=>"a\na\n"}, {OUT=>""}],
109 #['5',  '-u',  "a\na\n",          "",                0],
110 # Test output of -d option; only repeated lines
111 ['20', '-d', {IN=>"a\na\n"}, {OUT=>"a\n"}],
112 ['21', '-d', {IN=>"a\nb\n"}, {OUT=>""}],
113 ['22', '-d', {IN=>"a\nb\na\n"}, {OUT=>""}],
114 ['23', '-d', {IN=>"a\na\nb\n"}, {OUT=>"a\n"}],
115 # Check the key options
116 # If we skip over fields or characters, is the output deterministic?
117 ['obs30', '-1', {IN=>"a a\nb a\n"}, {OUT=>"a a\n"}],
118 ['31', qw(-f 1), {IN=>"a a\nb a\n"}, {OUT=>"a a\n"}],
119 ['32', qw(-f 1), {IN=>"a a\nb b\n"}, {OUT=>"a a\nb b\n"}],
120 ['33', qw(-f 1), {IN=>"a a a\nb a c\n"}, {OUT=>"a a a\nb a c\n"}],
121 ['34', qw(-f 1), {IN=>"b a\na a\n"}, {OUT=>"b a\n"}],
122 ['35', qw(-f 2), {IN=>"a a c\nb a c\n"}, {OUT=>"a a c\n"}],
123 # Skip over characters.
124 ['obs-plus40', '+1', {IN=>"aaa\naaa\n"}, {OUT=>"aaa\n"}],
125 ['obs-plus41', '+1', {IN=>"baa\naaa\n"}, {OUT=>"baa\n"}],
126 ['42', qw(-s 1), {IN=>"aaa\naaa\n"}, {OUT=>"aaa\n"}],
127 ['43', qw(-s 2), {IN=>"baa\naaa\n"}, {OUT=>"baa\n"}],
128 ['obs-plus44', qw(+1 --), {IN=>"aaa\naaa\n"}, {OUT=>"aaa\n"}],
129 ['obs-plus45', qw(+1 --), {IN=>"baa\naaa\n"}, {OUT=>"baa\n"}],
130 # Skip over fields and characters
131 ['50', qw(-f 1 -s 1), {IN=>"a aaa\nb ab\n"}, {OUT=>"a aaa\nb ab\n"}],
132 ['51', qw(-f 1 -s 1), {IN=>"a aaa\nb aaa\n"}, {OUT=>"a aaa\n"}],
133 ['52', qw(-s 1 -f 1), {IN=>"a aaa\nb ab\n"}, {OUT=>"a aaa\nb ab\n"}],
134 ['53', qw(-s 1 -f 1), {IN=>"a aaa\nb aaa\n"}, {OUT=>"a aaa\n"}],
135 # Fixed in 2.0.15
136 ['54', qw(-s 4), {IN=>"abc\nabcd\n"}, {OUT=>"abc\n"}],
137 # Supported in 2.0.15
138 ['55', qw(-s 0), {IN=>"abc\nabcd\n"}, {OUT=>"abc\nabcd\n"}],
139 ['56', qw(-s 0), {IN=>"abc\n"}, {OUT=>"abc\n"}],
140 ['57', qw(-w 0), {IN=>"abc\nabcd\n"}, {OUT=>"abc\n"}],
141 # Only account for a number of characters
142 ['60', qw(-w 1), {IN=>"a a\nb a\n"}, {OUT=>"a a\nb a\n"}],
143 ['61', qw(-w 3), {IN=>"a a\nb a\n"}, {OUT=>"a a\nb a\n"}],
144 ['62', qw(-w 1 -f 1), {IN=>"a a a\nb a c\n"}, {OUT=>"a a a\n"}],
145 ['63', qw(-f 1 -w 1), {IN=>"a a a\nb a c\n"}, {OUT=>"a a a\n"}],
146 # The blank after field one is checked too
147 ['64', qw(-f 1 -w 4), {IN=>"a a a\nb a c\n"}, {OUT=>"a a a\nb a c\n"}],
148 ['65', qw(-f 1 -w 3), {IN=>"a a a\nb a c\n"}, {OUT=>"a a a\n"}],
149 # Make sure we don't break if the file contains \0
150 ['90', '', {IN=>"a\0a\na\n"}, {OUT=>"a\0a\na\n"}],
151 # Check fields separated by tabs and by spaces
152 ['91', '', {IN=>"a\ta\na a\n"}, {OUT=>"a\ta\na a\n"}],
153 ['92', qw(-f 1), {IN=>"a\ta\na a\n"}, {OUT=>"a\ta\na a\n"}],
154 ['93', qw(-f 2), {IN=>"a\ta a\na a a\n"}, {OUT=>"a\ta a\n"}],
155 ['94', qw(-f 1), {IN=>"a\ta\na\ta\n"}, {OUT=>"a\ta\n"}],
156 # Check the count option; add tests for other options too
157 ['101', '-c', {IN=>"a\nb\n"}, {OUT=>"      1 a\n      1 b\n"}],
158 ['102', '-c', {IN=>"a\na\n"}, {OUT=>"      2 a\n"}],
159 # Check the local -D (--all-repeated) option
160 ['110', '-D', {IN=>"a\na\n"}, {OUT=>"a\na\n"}],
161 ['111', qw(-D -w1), {IN=>"a a\na b\n"}, {OUT=>"a a\na b\n"}],
162 ['112', qw(-D -c), {IN=>"a a\na b\n"}, {OUT=>""}, {EXIT=>1}, {ERR=>
163  "$prog: printing all duplicated lines and repeat counts is meaningless\n$try"}
164  ],
165 ['113', '--all-repeated=separate', {IN=>"a\na\n"}, {OUT=>"a\na\n"}],
166 ['114', '--all-repeated=separate',
167  {IN=>"a\na\nb\nc\nc\n"}, {OUT=>"a\na\n\nc\nc\n"}],
168 ['115', '--all-repeated=separate',
169  {IN=>"a\na\nb\nb\nc\n"}, {OUT=>"a\na\n\nb\nb\n"}],
170 ['116', '--all-repeated=prepend', {IN=>"a\na\n"}, {OUT=>"\na\na\n"}],
171 ['117', '--all-repeated=prepend',
172  {IN=>"a\na\nb\nc\nc\n"}, {OUT=>"\na\na\n\nc\nc\n"}],
173 ['118', '--all-repeated=prepend', {IN=>"a\nb\n"}, {OUT=>""}],
174 ['119', '--all-repeated=badoption', {IN=>"a\n"}, {OUT=>""}, {EXIT=>1},
175  {ERR=>"$prog: invalid argument 'badoption' for '--all-repeated'\n"
176        . "Valid arguments are:\n"
177        . "  - 'none'\n"
178        . "  - 'prepend'\n"
179        . "  - 'separate'\n"
180        . $try}],
181 # Check that -d and -u suppress all output, as POSIX requires.
182 ['120', qw(-d -u), {IN=>"a\na\n\b"}, {OUT=>""}],
183 ['121', "-d -u -w$limits->{UINTMAX_OFLOW}", {IN=>"a\na\n\b"}, {OUT=>""}],
184 ['122', "-d -u -w$limits->{SIZE_OFLOW}", {IN=>"a\na\n\b"}, {OUT=>""}],
185 # Check that --zero-terminated is synonymous with -z.
186 ['123', '--zero-terminated', {IN=>"a\na\nb"}, {OUT=>"a\na\nb\0"}],
187 ['124', '--zero-terminated', {IN=>"a\0a\0b"}, {OUT=>"a\0b\0"}],
188 # Check ignore-case
189 ['125', '',              {IN=>"A\na\n"}, {OUT=>"A\na\n"}],
190 ['126', '-i',            {IN=>"A\na\n"}, {OUT=>"A\n"}],
191 ['127', '--ignore-case', {IN=>"A\na\n"}, {OUT=>"A\n"}],
192 # Check grouping
193 ['128', '--group=prepend', {IN=>"a\na\nb\n"}, {OUT=>"\na\na\n\nb\n"}],
194 ['129', '--group=append',  {IN=>"a\na\nb\n"}, {OUT=>"a\na\n\nb\n\n"}],
195 ['130', '--group=separate',{IN=>"a\na\nb\n"}, {OUT=>"a\na\n\nb\n"}],
196 # no explicit grouping = separate
197 ['131', '--group',         {IN=>"a\na\nb\n"}, {OUT=>"a\na\n\nb\n"}],
198 ['132', '--group=both',    {IN=>"a\na\nb\n"}, {OUT=>"\na\na\n\nb\n\n"}],
199 # Grouping in the special case of a single group
200 ['133', '--group=prepend', {IN=>"a\na\n"}, {OUT=>"\na\na\n"}],
201 ['134', '--group=append',  {IN=>"a\na\n"}, {OUT=>"a\na\n\n"}],
202 ['135', '--group=separate',{IN=>"a\na\n"}, {OUT=>"a\na\n"}],
203 ['136', '--group',         {IN=>"a\na\n"}, {OUT=>"a\na\n"}],
204 # Grouping with empty input - should never print anything
205 ['137', '--group=prepend',  {IN=>""}, {OUT=>""}],
206 ['138', '--group=append',   {IN=>""}, {OUT=>""}],
207 ['139', '--group=separate', {IN=>""}, {OUT=>""}],
208 ['140', '--group=both',     {IN=>""}, {OUT=>""}],
209 # Grouping with other options - must fail
210 ['141', '--group -c',       {IN=>""}, {OUT=>""}, {EXIT=>1},
211  {ERR=>"$prog: --group is mutually exclusive with -c/-d/-D/-u\n" .
212        "Try 'uniq --help' for more information.\n"}],
213 ['142', '--group -d',       {IN=>""}, {OUT=>""}, {EXIT=>1},
214  {ERR=>"$prog: --group is mutually exclusive with -c/-d/-D/-u\n" .
215        "Try 'uniq --help' for more information.\n"}],
216 ['143', '--group -u',       {IN=>""}, {OUT=>""}, {EXIT=>1},
217  {ERR=>"$prog: --group is mutually exclusive with -c/-d/-D/-u\n" .
218        "Try 'uniq --help' for more information.\n"}],
219 ['144', '--group -D',       {IN=>""}, {OUT=>""}, {EXIT=>1},
220  {ERR=>"$prog: --group is mutually exclusive with -c/-d/-D/-u\n" .
221        "Try 'uniq --help' for more information.\n"}],
222 # Grouping with badoption
223 ['145', '--group=badoption',{IN=>""}, {OUT=>""}, {EXIT=>1},
224  {ERR=>"$prog: invalid argument 'badoption' for '--group'\n" .
225        "Valid arguments are:\n" .
226        "  - 'prepend'\n" .
227        "  - 'append'\n" .
228        "  - 'separate'\n" .
229        "  - 'both'\n" .
230        "Try '$prog --help' for more information.\n"}],
231);
232
233# Locale related tests
234
235my $locale = $ENV{LOCALE_FR};
236if ( defined $locale && $locale ne 'none' )
237  {
238    # I've only ever triggered the problem in a non-C locale.
239
240    # See if isblank returns true for nbsp.
241    my $x = qx!env printf '\xa0'| LC_ALL=$locale tr '[:blank:]' x!;
242    # If so, expect just one line of output in the schar test.
243    # Otherwise, expect two.
244    my $in = " y z\n\xa0 y z\n";
245    my $schar_exp = $x eq 'x' ? " y z\n" : $in;
246
247    my @Locale_Tests =
248    (
249      # Test for a subtle, system-and-locale-dependent bug in uniq.
250      ['schar', '-f1',  {IN => $in}, {OUT => $schar_exp},
251        {ENV => "LC_ALL=$locale"}]
252    );
253
254    push @Tests, @Locale_Tests;
255  }
256
257
258# Set _POSIX2_VERSION=199209 in the environment of each obs-plus* test.
259foreach my $t (@Tests)
260  {
261    $t->[0] =~ /^obs-plus/
262      and push @$t, {ENV=>'_POSIX2_VERSION=199209'};
263  }
264
265@Tests = add_z_variants \@Tests;
266@Tests = triple_test \@Tests;
267
268my $save_temps = $ENV{DEBUG};
269my $verbose = $ENV{VERBOSE};
270
271my $fail = run_tests ($prog, $prog, \@Tests, $save_temps, $verbose);
272exit $fail;
273