1#!/usr/bin/perl 2# Test uniq. 3 4# Copyright (C) 2008-2023 Free Software Foundation, Inc. 5 6# This program is free software: you can redistribute it and/or modify 7# it under the terms of the GNU General Public License as published by 8# the Free Software Foundation, either version 3 of the License, or 9# (at your option) any later version. 10 11# This program is distributed in the hope that it will be useful, 12# but WITHOUT ANY WARRANTY; without even the implied warranty of 13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14# GNU General Public License for more details. 15 16# You should have received a copy of the GNU General Public License 17# along with this program. If not, see <https://www.gnu.org/licenses/>. 18 19use strict; 20 21my $limits = getlimits (); 22 23my $prog = 'uniq'; 24my $try = "Try '$prog --help' for more information.\n"; 25 26# Turn off localization of executable's output. 27@ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3; 28 29# When possible, create a "-z"-testing variant of each test. 30sub add_z_variants($) 31{ 32 my ($tests) = @_; 33 my @new; 34 TEST: 35 foreach my $t (@$tests) 36 { 37 push @new, $t; 38 39 # skip the obsolete-syntax tests 40 $t->[0] =~ /^obs-plus/ 41 and next; 42 43 my @args; 44 my @list_of_hash; 45 46 foreach my $e (@$t) 47 { 48 !ref $e 49 and push (@args, $e), next; 50 51 ref $e && ref $e eq 'HASH' 52 or (warn "$0: $t->[0]: unexpected entry type\n"), next; 53 my $tmp = $e; 54 foreach my $k (qw(IN OUT)) 55 { 56 my $val = $e->{$k}; 57 # skip any test whose input or output already contains a NUL byte 58 if (defined $val) 59 { 60 $val =~ /\0/ 61 and next TEST; 62 63 # Convert each NL in input or output to \0. 64 $val =~ s/\n/\0/g; 65 $tmp = {$k => $val}; 66 last; 67 } 68 } 69 push @list_of_hash, $tmp; 70 } 71 72 shift @args; # discard test name 73 74 # skip any test that uses the -z option 75 grep /z/, @args 76 and next; 77 78 push @new, ["$t->[0]-z", '-z', @args, @list_of_hash]; 79 } 80 return @new; 81} 82 83my @Tests = 84( 85 ['1', '', {IN=>''}, {OUT=>''}], 86 ['2', '', {IN=>"a\na\n"}, {OUT=>"a\n"}], 87 ['3', '', {IN=>"a\na"}, {OUT=>"a\n"}], 88 ['4', '', {IN=>"a\nb"}, {OUT=>"a\nb\n"}], 89 ['5', '', {IN=>"a\na\nb"}, {OUT=>"a\nb\n"}], 90 ['6', '', {IN=>"b\na\na\n"}, {OUT=>"b\na\n"}], 91 ['7', '', {IN=>"a\nb\nc\n"}, {OUT=>"a\nb\nc\n"}], 92 93 # Ensure that newlines are not interpreted with -z. 94 ['2z', '-z', {IN=>"a\na\n"}, {OUT=>"a\na\n\0"}], 95 ['3z', '-z', {IN=>"a\na"}, {OUT=>"a\na\0"}], 96 ['4z', '-z', {IN=>"a\nb"}, {OUT=>"a\nb\0"}], 97 ['5z', '-z', {IN=>"a\na\nb"}, {OUT=>"a\na\nb\0"}], 98 ['10z', '-z -f1', {IN=>"a\nb\n\0c\nb\n\0"}, {OUT=>"a\nb\n\0"}], 99 ['20z', '-dz', {IN=>"a\na\n"}, {OUT=>""}], 100 101 # Make sure that eight bit characters work 102 ['8', '', {IN=>"ö\nv\n"}, {OUT=>"ö\nv\n"}], 103 # Test output of -u option; only unique lines 104 ['9', '-u', {IN=>"a\na\n"}, {OUT=>""}], 105 ['10', '-u', {IN=>"a\nb\n"}, {OUT=>"a\nb\n"}], 106 ['11', '-u', {IN=>"a\nb\na\n"}, {OUT=>"a\nb\na\n"}], 107 ['12', '-u', {IN=>"a\na\n"}, {OUT=>""}], 108 ['13', '-u', {IN=>"a\na\n"}, {OUT=>""}], 109 #['5', '-u', "a\na\n", "", 0], 110 # Test output of -d option; only repeated lines 111 ['20', '-d', {IN=>"a\na\n"}, {OUT=>"a\n"}], 112 ['21', '-d', {IN=>"a\nb\n"}, {OUT=>""}], 113 ['22', '-d', {IN=>"a\nb\na\n"}, {OUT=>""}], 114 ['23', '-d', {IN=>"a\na\nb\n"}, {OUT=>"a\n"}], 115 # Check the key options 116 # If we skip over fields or characters, is the output deterministic? 117 ['obs30', '-1', {IN=>"a a\nb a\n"}, {OUT=>"a a\n"}], 118 ['31', qw(-f 1), {IN=>"a a\nb a\n"}, {OUT=>"a a\n"}], 119 ['32', qw(-f 1), {IN=>"a a\nb b\n"}, {OUT=>"a a\nb b\n"}], 120 ['33', qw(-f 1), {IN=>"a a a\nb a c\n"}, {OUT=>"a a a\nb a c\n"}], 121 ['34', qw(-f 1), {IN=>"b a\na a\n"}, {OUT=>"b a\n"}], 122 ['35', qw(-f 2), {IN=>"a a c\nb a c\n"}, {OUT=>"a a c\n"}], 123 # Skip over characters. 124 ['obs-plus40', '+1', {IN=>"aaa\naaa\n"}, {OUT=>"aaa\n"}], 125 ['obs-plus41', '+1', {IN=>"baa\naaa\n"}, {OUT=>"baa\n"}], 126 ['42', qw(-s 1), {IN=>"aaa\naaa\n"}, {OUT=>"aaa\n"}], 127 ['43', qw(-s 2), {IN=>"baa\naaa\n"}, {OUT=>"baa\n"}], 128 ['obs-plus44', qw(+1 --), {IN=>"aaa\naaa\n"}, {OUT=>"aaa\n"}], 129 ['obs-plus45', qw(+1 --), {IN=>"baa\naaa\n"}, {OUT=>"baa\n"}], 130 # Skip over fields and characters 131 ['50', qw(-f 1 -s 1), {IN=>"a aaa\nb ab\n"}, {OUT=>"a aaa\nb ab\n"}], 132 ['51', qw(-f 1 -s 1), {IN=>"a aaa\nb aaa\n"}, {OUT=>"a aaa\n"}], 133 ['52', qw(-s 1 -f 1), {IN=>"a aaa\nb ab\n"}, {OUT=>"a aaa\nb ab\n"}], 134 ['53', qw(-s 1 -f 1), {IN=>"a aaa\nb aaa\n"}, {OUT=>"a aaa\n"}], 135 # Fixed in 2.0.15 136 ['54', qw(-s 4), {IN=>"abc\nabcd\n"}, {OUT=>"abc\n"}], 137 # Supported in 2.0.15 138 ['55', qw(-s 0), {IN=>"abc\nabcd\n"}, {OUT=>"abc\nabcd\n"}], 139 ['56', qw(-s 0), {IN=>"abc\n"}, {OUT=>"abc\n"}], 140 ['57', qw(-w 0), {IN=>"abc\nabcd\n"}, {OUT=>"abc\n"}], 141 # Only account for a number of characters 142 ['60', qw(-w 1), {IN=>"a a\nb a\n"}, {OUT=>"a a\nb a\n"}], 143 ['61', qw(-w 3), {IN=>"a a\nb a\n"}, {OUT=>"a a\nb a\n"}], 144 ['62', qw(-w 1 -f 1), {IN=>"a a a\nb a c\n"}, {OUT=>"a a a\n"}], 145 ['63', qw(-f 1 -w 1), {IN=>"a a a\nb a c\n"}, {OUT=>"a a a\n"}], 146 # The blank after field one is checked too 147 ['64', qw(-f 1 -w 4), {IN=>"a a a\nb a c\n"}, {OUT=>"a a a\nb a c\n"}], 148 ['65', qw(-f 1 -w 3), {IN=>"a a a\nb a c\n"}, {OUT=>"a a a\n"}], 149 # Make sure we don't break if the file contains \0 150 ['90', '', {IN=>"a\0a\na\n"}, {OUT=>"a\0a\na\n"}], 151 # Check fields separated by tabs and by spaces 152 ['91', '', {IN=>"a\ta\na a\n"}, {OUT=>"a\ta\na a\n"}], 153 ['92', qw(-f 1), {IN=>"a\ta\na a\n"}, {OUT=>"a\ta\na a\n"}], 154 ['93', qw(-f 2), {IN=>"a\ta a\na a a\n"}, {OUT=>"a\ta a\n"}], 155 ['94', qw(-f 1), {IN=>"a\ta\na\ta\n"}, {OUT=>"a\ta\n"}], 156 # Check the count option; add tests for other options too 157 ['101', '-c', {IN=>"a\nb\n"}, {OUT=>" 1 a\n 1 b\n"}], 158 ['102', '-c', {IN=>"a\na\n"}, {OUT=>" 2 a\n"}], 159 # Check the local -D (--all-repeated) option 160 ['110', '-D', {IN=>"a\na\n"}, {OUT=>"a\na\n"}], 161 ['111', qw(-D -w1), {IN=>"a a\na b\n"}, {OUT=>"a a\na b\n"}], 162 ['112', qw(-D -c), {IN=>"a a\na b\n"}, {OUT=>""}, {EXIT=>1}, {ERR=> 163 "$prog: printing all duplicated lines and repeat counts is meaningless\n$try"} 164 ], 165 ['113', '--all-repeated=separate', {IN=>"a\na\n"}, {OUT=>"a\na\n"}], 166 ['114', '--all-repeated=separate', 167 {IN=>"a\na\nb\nc\nc\n"}, {OUT=>"a\na\n\nc\nc\n"}], 168 ['115', '--all-repeated=separate', 169 {IN=>"a\na\nb\nb\nc\n"}, {OUT=>"a\na\n\nb\nb\n"}], 170 ['116', '--all-repeated=prepend', {IN=>"a\na\n"}, {OUT=>"\na\na\n"}], 171 ['117', '--all-repeated=prepend', 172 {IN=>"a\na\nb\nc\nc\n"}, {OUT=>"\na\na\n\nc\nc\n"}], 173 ['118', '--all-repeated=prepend', {IN=>"a\nb\n"}, {OUT=>""}], 174 ['119', '--all-repeated=badoption', {IN=>"a\n"}, {OUT=>""}, {EXIT=>1}, 175 {ERR=>"$prog: invalid argument 'badoption' for '--all-repeated'\n" 176 . "Valid arguments are:\n" 177 . " - 'none'\n" 178 . " - 'prepend'\n" 179 . " - 'separate'\n" 180 . $try}], 181 # Check that -d and -u suppress all output, as POSIX requires. 182 ['120', qw(-d -u), {IN=>"a\na\n\b"}, {OUT=>""}], 183 ['121', "-d -u -w$limits->{UINTMAX_OFLOW}", {IN=>"a\na\n\b"}, {OUT=>""}], 184 ['122', "-d -u -w$limits->{SIZE_OFLOW}", {IN=>"a\na\n\b"}, {OUT=>""}], 185 # Check that --zero-terminated is synonymous with -z. 186 ['123', '--zero-terminated', {IN=>"a\na\nb"}, {OUT=>"a\na\nb\0"}], 187 ['124', '--zero-terminated', {IN=>"a\0a\0b"}, {OUT=>"a\0b\0"}], 188 # Check ignore-case 189 ['125', '', {IN=>"A\na\n"}, {OUT=>"A\na\n"}], 190 ['126', '-i', {IN=>"A\na\n"}, {OUT=>"A\n"}], 191 ['127', '--ignore-case', {IN=>"A\na\n"}, {OUT=>"A\n"}], 192 # Check grouping 193 ['128', '--group=prepend', {IN=>"a\na\nb\n"}, {OUT=>"\na\na\n\nb\n"}], 194 ['129', '--group=append', {IN=>"a\na\nb\n"}, {OUT=>"a\na\n\nb\n\n"}], 195 ['130', '--group=separate',{IN=>"a\na\nb\n"}, {OUT=>"a\na\n\nb\n"}], 196 # no explicit grouping = separate 197 ['131', '--group', {IN=>"a\na\nb\n"}, {OUT=>"a\na\n\nb\n"}], 198 ['132', '--group=both', {IN=>"a\na\nb\n"}, {OUT=>"\na\na\n\nb\n\n"}], 199 # Grouping in the special case of a single group 200 ['133', '--group=prepend', {IN=>"a\na\n"}, {OUT=>"\na\na\n"}], 201 ['134', '--group=append', {IN=>"a\na\n"}, {OUT=>"a\na\n\n"}], 202 ['135', '--group=separate',{IN=>"a\na\n"}, {OUT=>"a\na\n"}], 203 ['136', '--group', {IN=>"a\na\n"}, {OUT=>"a\na\n"}], 204 # Grouping with empty input - should never print anything 205 ['137', '--group=prepend', {IN=>""}, {OUT=>""}], 206 ['138', '--group=append', {IN=>""}, {OUT=>""}], 207 ['139', '--group=separate', {IN=>""}, {OUT=>""}], 208 ['140', '--group=both', {IN=>""}, {OUT=>""}], 209 # Grouping with other options - must fail 210 ['141', '--group -c', {IN=>""}, {OUT=>""}, {EXIT=>1}, 211 {ERR=>"$prog: --group is mutually exclusive with -c/-d/-D/-u\n" . 212 "Try 'uniq --help' for more information.\n"}], 213 ['142', '--group -d', {IN=>""}, {OUT=>""}, {EXIT=>1}, 214 {ERR=>"$prog: --group is mutually exclusive with -c/-d/-D/-u\n" . 215 "Try 'uniq --help' for more information.\n"}], 216 ['143', '--group -u', {IN=>""}, {OUT=>""}, {EXIT=>1}, 217 {ERR=>"$prog: --group is mutually exclusive with -c/-d/-D/-u\n" . 218 "Try 'uniq --help' for more information.\n"}], 219 ['144', '--group -D', {IN=>""}, {OUT=>""}, {EXIT=>1}, 220 {ERR=>"$prog: --group is mutually exclusive with -c/-d/-D/-u\n" . 221 "Try 'uniq --help' for more information.\n"}], 222 # Grouping with badoption 223 ['145', '--group=badoption',{IN=>""}, {OUT=>""}, {EXIT=>1}, 224 {ERR=>"$prog: invalid argument 'badoption' for '--group'\n" . 225 "Valid arguments are:\n" . 226 " - 'prepend'\n" . 227 " - 'append'\n" . 228 " - 'separate'\n" . 229 " - 'both'\n" . 230 "Try '$prog --help' for more information.\n"}], 231); 232 233# Locale related tests 234 235my $locale = $ENV{LOCALE_FR}; 236if ( defined $locale && $locale ne 'none' ) 237 { 238 # I've only ever triggered the problem in a non-C locale. 239 240 # See if isblank returns true for nbsp. 241 my $x = qx!env printf '\xa0'| LC_ALL=$locale tr '[:blank:]' x!; 242 # If so, expect just one line of output in the schar test. 243 # Otherwise, expect two. 244 my $in = " y z\n\xa0 y z\n"; 245 my $schar_exp = $x eq 'x' ? " y z\n" : $in; 246 247 my @Locale_Tests = 248 ( 249 # Test for a subtle, system-and-locale-dependent bug in uniq. 250 ['schar', '-f1', {IN => $in}, {OUT => $schar_exp}, 251 {ENV => "LC_ALL=$locale"}] 252 ); 253 254 push @Tests, @Locale_Tests; 255 } 256 257 258# Set _POSIX2_VERSION=199209 in the environment of each obs-plus* test. 259foreach my $t (@Tests) 260 { 261 $t->[0] =~ /^obs-plus/ 262 and push @$t, {ENV=>'_POSIX2_VERSION=199209'}; 263 } 264 265@Tests = add_z_variants \@Tests; 266@Tests = triple_test \@Tests; 267 268my $save_temps = $ENV{DEBUG}; 269my $verbose = $ENV{VERBOSE}; 270 271my $fail = run_tests ($prog, $prog, \@Tests, $save_temps, $verbose); 272exit $fail; 273