1#!/usr/bin/perl 2# Test join. 3 4# Copyright (C) 2008-2023 Free Software Foundation, Inc. 5 6# This program is free software: you can redistribute it and/or modify 7# it under the terms of the GNU General Public License as published by 8# the Free Software Foundation, either version 3 of the License, or 9# (at your option) any later version. 10 11# This program is distributed in the hope that it will be useful, 12# but WITHOUT ANY WARRANTY; without even the implied warranty of 13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14# GNU General Public License for more details. 15 16# You should have received a copy of the GNU General Public License 17# along with this program. If not, see <https://www.gnu.org/licenses/>. 18 19use strict; 20 21my $limits = getlimits (); 22 23# Turn off localization of executable's output. 24@ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3; 25 26my $prog = 'join'; 27 28my $delim = chr 0247; 29sub t_subst ($) 30{ 31 (my $s = $_[0]) =~ s/:/$delim/g; 32 return $s; 33} 34 35my @tv = ( 36# test name 37# flags file-1 file-2 expected output expected return code 38# 39['1a', '-a1', ["a 1\n", "b\n"], "a 1\n", 0], 40['1b', '-a2', ["a 1\n", "b\n"], "b\n", 0], # Got "\n" 41['1c', '-a1 -a2', ["a 1\n", "b\n"], "a 1\nb\n", 0], # Got "a 1\n\n" 42['1d', '-a1', ["a 1\nb\n", "b\n"], "a 1\nb\n", 0], 43['1e', '-a2', ["a 1\nb\n", "b\n"], "b\n", 0], 44['1f', '-a2', ["b\n", "a\nb\n"], "a\nb\n", 0], 45 46['2a', '-a1 -e .', ["a\nb\nc\n", "a x y\nb\nc\n"], "a x y\nb\nc\n", 0], 47['2b', '-a1 -e . -o 2.1,2.2,2.3', ["a\nb\nc\n", "a x y\nb\nc\n"], 48 "a x y\nb . .\nc . .\n", 0], 49['2c', '-a1 -e . -o 2.1,2.2,2.3', ["a\nb\nc\nd\n", "a x y\nb\nc\n"], 50 "a x y\nb . .\nc . .\n. . .\n", 0], 51 52['3a', '-t:', ["a:1\nb:1\n", "a:2:\nb:2:\n"], "a:1:2:\nb:1:2:\n", 0], 53 54# operate on whole line (as sort does by default) 55['3b', '-t ""', ["a 1\nb 1\n", "a 1\nb 2\n"], "a 1\n", 0], 56# use NUL as the field delimiter 57['3c', '-t "\\0"', ["a\0a\n", "a\0b\n"], "a\0a\0b\n", 0], 58 59# Just like -a1 and -a2 when there are no pairable lines 60['4a', '-v 1', ["a 1\n", "b\n"], "a 1\n", 0], 61['4b', '-v 2', ["a 1\n", "b\n"], "b\n", 0], 62 63['4c', '-v 1', ["a 1\nb\n", "b\n"], "a 1\n", 0], 64['4d', '-v 2', ["a 1\nb\n", "b\n"], "", 0], 65['4e', '-v 2', ["b\n", "a 1\nb\n"], "a 1\n", 0], 66['5a', '-a1 -e - -o 1.1,2.2', 67 ["a 1\nb 2\n", "a 11\nb\n"], "a 11\nb -\n", 0], 68['5b', '-a1 -e - -o 1.1,2.2', 69 ["apr 15\naug 20\ndec 18\nfeb 05\n", "apr 06\naug 14\ndate\nfeb 15"], 70 "apr 06\naug 14\ndec -\nfeb 15\n", 0], 71['5c', '-a1 -e - -o 1.1,2.2', 72 ["aug 20\ndec 18\n", "aug 14\ndate\nfeb 15"], 73 "aug 14\ndec -\n", 0], 74['5d', '-a1 -e - -o 1.1,2.2', 75 ["dec 18\n", ""], "dec -\n", 0], 76['5e', '-a2 -e - -o 1.1,2.2', 77 ["apr 15\naug 20\ndec 18\nfeb 05\n", "apr 06\naug 14\ndate\nfeb 15\n"], 78 "apr 06\naug 14\n- -\nfeb 15\n", 0], 79['5f', '-a2 -e - -o 2.2,1.1', 80 ["apr 15\naug 20\ndec 18\nfeb 05\n", "apr 06\naug 14\ndate\nfeb 15\n"], 81 "06 apr\n14 aug\n- -\n15 feb\n", 0], 82['5g', '-a1 -e - -o 2.2,1.1', 83 ["apr 15\naug 20\ndec 18\nfeb 05\n", "apr 06\naug 14\ndate\nfeb 15\n"], 84 "06 apr\n14 aug\n- dec\n15 feb\n", 0], 85 86['5h', '-a1 -e - -o 2.2,1.1', 87 ["apr 15\naug 20\ndec 18\nfeb 05\n", "apr 06\naug 14\ndate\n"], 88 "06 apr\n14 aug\n- dec\n- feb\n", 0], 89['5i', '-a1 -e - -o 1.1,2.2', 90 ["apr 15\naug 20\ndec 18\nfeb 05\n", "apr 06\naug 14\ndate\n"], 91 "apr 06\naug 14\ndec -\nfeb -\n", 0], 92 93['5j', '-a2 -e - -o 2.2,1.1', 94 ["apr 15\naug 20\ndec 18\nfeb 05\n", "apr 06\naug 14\ndate\n"], 95 "06 apr\n14 aug\n- -\n", 0], 96['5k', '-a2 -e - -o 2.2,1.1', 97 ["apr 15\naug 20\ndec 18\nfeb 05\n", "apr 06\naug 14\ndate\n"], 98 "06 apr\n14 aug\n- -\n", 0], 99 100['5l', '-a1 -e - -o 2.2,1.1', 101 ["apr 15\naug 20\ndec 18\n", "apr 06\naug 14\ndate\nfeb 15\n"], 102 "06 apr\n14 aug\n- dec\n", 0], 103['5m', '-a2 -e - -o 2.2,1.1', 104 ["apr 15\naug 20\ndec 18\n", "apr 06\naug 14\ndate\nfeb 15\n"], 105 "06 apr\n14 aug\n- -\n15 -\n", 0], 106 107['6a', '-e -', 108 ["a 1\nb 2\nd 4\n", "a 21\nb 22\nc 23\nf 26\n"], 109 "a 1 21\nb 2 22\n", 0], 110['6b', '-a1 -e -', 111 ["a 1\nb 2\nd 4\n", "a 21\nb 22\nc 23\nf 26\n"], 112 "a 1 21\nb 2 22\nd 4\n", 0], 113['6c', '-a1 -e -', 114 ["a 21\nb 22\nc 23\nf 26\n", "a 1\nb 2\nd 4\n"], 115 "a 21 1\nb 22 2\nc 23\nf 26\n", 0], 116 117['7a', '-a1 -e . -o 2.7', 118 ["a\nb\nc\n", "a x y\nb\nc\n"], ".\n.\n.\n", 0], 119 120['8a', '-a1 -e . -o 0,1.2', 121 ["a\nb\nc\nd G\n", "a x y\nb\nc\ne\n"], 122 "a .\nb .\nc .\nd G\n", 0], 123['8b', '-a1 -a2 -e . -o 0,1.2', 124 ["a\nb\nc\nd G\n", "a x y\nb\nc\ne\n"], 125 "a .\nb .\nc .\nd G\ne .\n", 0], 126 127# From David Dyck 128['9a', '', [" a 1\n b 2\n", " a Y\n b Z\n"], "a 1 Y\nb 2 Z\n", 0], 129 130# -o 'auto' 131['10a', '-a1 -a2 -e . -o auto', 132 ["a 1 2\nb 1\nd 1 2\n", "a 3 4\nb 3 4\nc 3 4\n"], 133 "a 1 2 3 4\nb 1 . 3 4\nc . . 3 4\nd 1 2 . .\n", 0], 134['10b', '-a1 -a2 -j3 -e . -o auto', 135 ["a 1 2\nb 1\nd 1 2\n", "a 3 4\nb 3 4\nc 3 4\n"], 136 "2 a 1 . .\n. b 1 . .\n2 d 1 . .\n4 . . a 3\n4 . . b 3\n4 . . c 3\n"], 137['10c', '-a1 -1 1 -2 4 -e. -o auto', 138 ["a 1 2\nb 1\nd 1 2\n", "a 3 4\nb 3 4\nc 3 4\n"], 139 "a 1 2 . . .\nb 1 . . . .\nd 1 2 . . .\n"], 140['10d', '-a2 -1 1 -2 4 -e. -o auto', 141 ["a 1 2\nb 1\nd 1 2\n", "a 3 4\nb 3 4\nc 3 4\n"], 142 ". . . a 3 4\n. . . b 3 4\n. . . c 3 4\n"], 143['10e', '-o auto', 144 ["a 1 2\nb 1 2 discard\n", "a 3 4\nb 3 4 discard\n"], 145 "a 1 2 3 4\nb 1 2 3 4\n"], 146['10f', '-t, -o auto', 147 ["a,1,,2\nb,1,2\n", "a,3,4\nb,3,4\n"], 148 "a,1,,2,3,4\nb,1,2,,3,4\n"], 149 150# For -v2, print the match field correctly with the default output format, 151# when that match field is different between file 1 and file 2. Fixed in 8.10 152['v2-order', '-v2 -2 2', ["", "2 1\n"], "1 2\n", 0], 153 154# From Tim Smithers: fixed in 1.22l 155['trailing-sp', '-t: -1 1 -2 1', ["a:x \n", "a:y \n"], "a:x :y \n", 0], 156 157# From Paul Eggert: fixed in 1.22n 158['sp-vs-blank', '', ["\f 1\n", "\f 2\n"], "\f 1 2\n", 0], 159 160# From Paul Eggert: fixed in 1.22n (this would fail on Solaris7, 161# with LC_ALL set to en_US). 162# Unfortunately, that Solaris7's en_US locale folds case (making 163# the first input file sorted) is not portable, so this test would 164# fail on e.g. Linux systems, because the input to join isn't sorted. 165# ['lc-collate', '', ["a 1a\nB 1B\n", "B 2B\n"], "B 1B 2B\n", 0], 166 167# Based on a report from Antonio Rendas. Fixed in 2.0.9. 168['8-bit-t', t_subst "-t:", 169 [t_subst "a:1\nb:1\n", t_subst "a:2:\nb:2:\n"], 170 t_subst "a:1:2:\nb:1:2:\n", 0], 171 172# fields > SIZE_MAX are silently interpreted as SIZE_MAX 173['bigfield1', "-1 $limits->{UINTMAX_OFLOW} -2 2", 174 ["a\n", "b\n"], " a b\n", 0], 175['bigfield2', "-1 $limits->{SIZE_OFLOW} -2 2", 176 ["a\n", "b\n"], " a b\n", 0], 177 178# FIXME: change this to ensure the diagnostic makes sense 179['invalid-j', '-j x', ["", ""], "", 1, 180 "$prog: invalid field number: 'x'\n"], 181 182# With ordering check, inputs in order 183['chkodr-1', '--check-order', 184 [" a 1\n b 2\n", " a Y\n b Z\n"], "a 1 Y\nb 2 Z\n", 0], 185 186# Without check, inputs in order 187['chkodr-2', '--nocheck-order', 188 [" a 1\n b 2\n", " a Y\n b Z\n"], "a 1 Y\nb 2 Z\n", 0], 189 190# Without check, both inputs out of order (in fact, in reverse order) 191# but all pairable. Support for this is a GNU extension. 192['chkodr-3', '--nocheck-order', 193 [" b 1\n a 2\n", " b Y\n a Z\n"], "b 1 Y\na 2 Z\n", 0], 194 195# The extension should work without --nocheck-order, since that is the 196# default. 197['chkodr-4', '', 198 [" b 1\n a 2\n", " b Y\n a Z\n"], "b 1 Y\na 2 Z\n", 0], 199 200# With check, both inputs out of order (in fact, in reverse order) 201['chkodr-5', '--check-order', 202 [" b 1\n a 2\n", " b Y\n a Z\n"], "", 1, 203 "$prog: chkodr-5.1:2: is not sorted: a 2\n"], 204 205# Similar, but with only file 2 not sorted. 206['chkodr-5b', '--check-order', 207 [" a 2\n b 1\n", " b Y\n a Z\n"], "", 1, 208 "$prog: chkodr-5b.2:2: is not sorted: a Z\n"], 209 210# Similar, but with the offending line having length 0 (excluding newline). 211['chkodr-5c', '--check-order', 212 [" a 2\n b 1\n", " b Y\n\n"], "", 1, 213 "$prog: chkodr-5c.2:2: is not sorted: \n"], 214 215# Similar, but elicit a warning for each input file (without --check-order). 216['chkodr-5d', '', 217 ["a\nx\n\n", "b\ny\n\n"], "", 1, 218 "$prog: chkodr-5d.1:3: is not sorted: \n" . 219 "$prog: chkodr-5d.2:3: is not sorted: \n" . 220 "$prog: input is not in sorted order\n" 221 ], 222 223# Similar, but make it so each offending line has no newline. 224['chkodr-5e', '', 225 ["a\nx\no", "b\ny\np"], "", 1, 226 "$prog: chkodr-5e.1:3: is not sorted: o\n" . 227 "$prog: chkodr-5e.2:3: is not sorted: p\n" . 228 "$prog: input is not in sorted order\n" 229 ], 230 231# Without order check, both inputs out of order and some lines 232# unpairable. This is NOT supported by the GNU extension. All that 233# we really care about for this test is that the return status is 234# zero, since that is the only way to actually verify that the 235# --nocheck-order option had any effect. We don't actually want to 236# guarantee that join produces this output on stdout. 237['chkodr-6', '--nocheck-order', 238 [" b 1\n a 2\n", " b Y\n c Z\n"], "b 1 Y\n", 0], 239 240# Before 6.10.143, this would mistakenly fail with the diagnostic: 241# join: File 1 is not in sorted order 242['chkodr-7', '-12', ["2 a\n1 b\n", "2 c\n1 d"], "", 0], 243 244# After 8.9, join doesn't report disorder by default 245# when comparing against an empty input file. 246['chkodr-8', '', ["2 a\n1 b\n", ""], "", 0], 247 248# Test '--header' feature 249['header-1', '--header', 250 [ "ID Name\n1 A\n2 B\n", "ID Color\n1 red\n"], "ID Name Color\n1 A red\n", 0], 251 252# '--header' with '--check-order' : The header line is out-of-order but the 253# actual data is in order. This join should succeed. 254['header-2', '--header --check-order', 255 ["ID Name\n1 A\n2 B\n", "ID Color\n2 green\n"], 256 "ID Name Color\n2 B green\n", 0], 257 258# '--header' with '--check-order' : The header line is out-of-order AND the 259# actual data out-of-order. This join should fail. 260['header-3', '--header --check-order', 261 ["ID Name\n2 B\n1 A\n", "ID Color\n2 blue\n"], "ID Name Color\n", 1, 262 "$prog: header-3.1:3: is not sorted: 1 A\n"], 263 264# '--header' with specific output format '-o'. 265# output header line should respect the requested format 266['header-4', '--header -o "0,1.3,2.2"', 267 ["ID Group Name\n1 Foo A\n2 Bar B\n", "ID Color\n2 blue\n"], 268 "ID Name Color\n2 B blue\n", 0], 269 270# '--header' always outputs headers from the first file 271# even if the headers from the second file don't match 272['header-5', '--header', 273 [ "ID1 Name\n1 A\n2 B\n", "ID2 Color\n1 red\n"], 274 "ID1 Name Color\n1 A red\n", 0], 275 276# '--header' doesn't check order of a header 277# even if there is no header in the second file 278['header-6', '--header -a1', 279 [ "ID1 Name\n1 A\n", ""], 280 "ID1 Name\n1 A\n", 0], 281 282# Zero-terminated lines 283['z1', '-z', 284 ["a\0c\0e\0", "a\0b\0c\0"], "a\0c\0", 0], 285 286# not zero-terminated, but related to the code change: 287# the old readlinebuffer() auto-added '\n' to the last line. 288# the new readlinebuffer_delim() does not. 289# Ensure it doesn't matter. 290['z2', '', 291 ["a\nc\ne\n", "a\nb\nc"], "a\nc\n", 0], 292['z3', '', 293 ["a\nc\ne", "a\nb\nc"], "a\nc\n", 0], 294# missing last NUL at the end of the last line (=end of file) 295['z4', '-z', 296 ["a\0c\0e", "a\0b\0c"], "a\0c\0", 0], 297# With -z, embedded newlines are treated as field separators. 298# Note '\n' are converted to ' ' in this case. 299['z5', '-z -a1 -a2', 300 ["a\n\n1\0c 3\0", "a 2\0b\n8\0c 9\0"], "a 1 2\0b 8\0c 3 9\0"], 301# One can avoid field processing like: 302['z6', '-z -t ""', 303 ["a\n1\n\0", "a\n1\n\0"], "a\n1\n\0"], 304 305); 306 307# Convert the above old-style test vectors to the newer 308# format used by Coreutils.pm. 309 310my @Tests; 311foreach my $t (@tv) 312 { 313 my ($test_name, $flags, $in, $exp, $ret, $err_msg) = @$t; 314 my $new_ent = [$test_name, $flags]; 315 if (!ref $in) 316 { 317 push @$new_ent, {IN=>$in}; 318 } 319 elsif (ref $in eq 'HASH') 320 { 321 # ignore 322 } 323 else 324 { 325 foreach my $e (@$in) 326 { 327 push @$new_ent, {IN=>$e}; 328 } 329 } 330 push @$new_ent, {OUT=>$exp}; 331 $ret 332 and push @$new_ent, {EXIT=>$ret}, {ERR=>$err_msg}; 333 push @Tests, $new_ent; 334 } 335 336@Tests = triple_test \@Tests; 337 338my $save_temps = $ENV{DEBUG}; 339my $verbose = $ENV{VERBOSE}; 340 341my $fail = run_tests ($prog, $prog, \@Tests, $save_temps, $verbose); 342exit $fail; 343