1#!/usr/bin/perl
2# Test "unexpand".
3
4# Copyright (C) 2000-2023 Free Software Foundation, Inc.
5
6# This program is free software: you can redistribute it and/or modify
7# it under the terms of the GNU General Public License as published by
8# the Free Software Foundation, either version 3 of the License, or
9# (at your option) any later version.
10
11# This program is distributed in the hope that it will be useful,
12# but WITHOUT ANY WARRANTY; without even the implied warranty of
13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14# GNU General Public License for more details.
15
16# You should have received a copy of the GNU General Public License
17# along with this program.  If not, see <https://www.gnu.org/licenses/>.
18
19use strict;
20
21my $limits = getlimits ();
22
23(my $program_name = $0) =~ s|.*/||;
24
25# Turn off localization of executable's output.
26@ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3;
27
28my $prog = 'unexpand';
29
30my @Tests =
31    (
32     ['a1', {IN=> ' 'x 1 ."y\n"}, {OUT=> ' 'x 1 ."y\n"}],
33     ['a2', {IN=> ' 'x 2 ."y\n"}, {OUT=> ' 'x 2 ."y\n"}],
34     ['a3', {IN=> ' 'x 3 ."y\n"}, {OUT=> ' 'x 3 ."y\n"}],
35     ['a4', {IN=> ' 'x 4 ."y\n"}, {OUT=> ' 'x 4 ."y\n"}],
36     ['a5', {IN=> ' 'x 5 ."y\n"}, {OUT=> ' 'x 5 ."y\n"}],
37     ['a6', {IN=> ' 'x 6 ."y\n"}, {OUT=> ' 'x 6 ."y\n"}],
38     ['a7', {IN=> ' 'x 7 ."y\n"}, {OUT=> ' 'x 7 ."y\n"}],
39     ['a8', {IN=> ' 'x 8 ."y\n"}, {OUT=> "\ty\n"}],
40
41     ['aa-1', '-a', {IN=> 'w'.' 'x 1 ."y\n"}, {OUT=> 'w'.' 'x 1 ."y\n"}],
42     ['aa-2', '-a', {IN=> 'w'.' 'x 2 ."y\n"}, {OUT=> 'w'.' 'x 2 ."y\n"}],
43     ['aa-3', '-a', {IN=> 'w'.' 'x 3 ."y\n"}, {OUT=> 'w'.' 'x 3 ."y\n"}],
44     ['aa-4', '-a', {IN=> 'w'.' 'x 4 ."y\n"}, {OUT=> 'w'.' 'x 4 ."y\n"}],
45     ['aa-5', '-a', {IN=> 'w'.' 'x 5 ."y\n"}, {OUT=> 'w'.' 'x 5 ."y\n"}],
46     ['aa-6', '-a', {IN=> 'w'.' 'x 6 ."y\n"}, {OUT=> 'w'.' 'x 6 ."y\n"}],
47     ['aa-7', '-a', {IN=> 'w'.' 'x 7 ."y\n"}, {OUT=> "w\ty\n"}],
48     ['aa-8', '-a', {IN=> 'w'.' 'x 8 ."y\n"}, {OUT=> "w\t y\n"}],
49
50     ['b-1', '-t', '2,4', {IN=> "      ."}, {OUT=>"\t\t  ."}],
51     # These would infloop prior to textutils-2.0d.
52
53     ['infloop-1', '-t', '1,2', {IN=> " \t\t .\n"}, {OUT=>"\t\t\t .\n"}],
54     ['infloop-2', '-t', '4,5', {IN=> ' 'x4 . "\t\t \n"}, {OUT=>"\t\t\t \n"}],
55     ['infloop-3', '-t', '2,3', {IN=> "x \t\t \n"}, {OUT=>"x\t\t\t \n"}],
56     ['infloop-4', '-t', '1,2', {IN=> " \t\t   \n"}, {OUT=>"\t\t\t   \n"}],
57     ['c-1', '-t', '1,2', {IN=> "x\t\t .\n"}, {OUT=>"x\t\t .\n"}],
58
59     # -t implies -a
60     # Feature addition (--first-only) prompted by a report from Jie Xu.
61     ['tabs-1', qw(-t 3),              {IN=> "   a  b\n"}, {OUT=>"\ta\tb\n"}],
62     ['tabs-2', qw(-t 3 --first-only), {IN=> "   a  b\n"}, {OUT=>"\ta  b\n"}],
63
64     # blanks
65     ['blanks-1', qw(-t 1), {IN=> " b  c   d\n"}, {OUT=> "\tb\t\tc\t\t\td\n"}],
66     ['blanks-2', qw(-t 1), {IN=> "a \n"}, {OUT=> "a \n"}],
67     ['blanks-3', qw(-t 1), {IN=> "a  \n"}, {OUT=> "a\t\t\n"}],
68     ['blanks-4', qw(-t 1), {IN=> "a   \n"}, {OUT=> "a\t\t\t\n"}],
69     ['blanks-5', qw(-t 1), {IN=> "a "}, {OUT=> "a "}],
70     ['blanks-6', qw(-t 1), {IN=> "a  "}, {OUT=> "a\t\t"}],
71     ['blanks-7', qw(-t 1), {IN=> "a   "}, {OUT=> "a\t\t\t"}],
72     ['blanks-8', qw(-t 1), {IN=> " a a  a\n"}, {OUT=> "\ta a\t\ta\n"}],
73     ['blanks-9', qw(-t 2), {IN=> "   a  a  a\n"}, {OUT=> "\t a\ta\t a\n"}],
74     ['blanks-10', '-t', '3,4', {IN=> "0 2 4 6\t8\n"}, {OUT=> "0 2 4 6\t8\n"}],
75     ['blanks-11', '-t', '3,4', {IN=> "    4\n"}, {OUT=> "\t\t4\n"}],
76     ['blanks-12', '-t', '3,4', {IN=> "01  4\n"}, {OUT=> "01\t\t4\n"}],
77     ['blanks-13', '-t', '3,4', {IN=> "0   4\n"}, {OUT=> "0\t\t4\n"}],
78
79     # POSIX says spaces should only follow tabs. Also a single
80     # trailing space is not converted to a tab, when before
81     # a field starting with non blanks
82     ['posix-1', '-a', {IN=> "1234567   \t1\n"}, {OUT=>"1234567\t\t1\n"}],
83     ['posix-2', '-a', {IN=> "1234567  \t1\n"},  {OUT=>"1234567\t\t1\n"}],
84     ['posix-3', '-a', {IN=> "1234567 \t1\n"},   {OUT=>"1234567\t\t1\n"}],
85     ['posix-4', '-a', {IN=> "1234567\t1\n"},    {OUT=>"1234567\t1\n"}],
86     ['posix-5', '-a', {IN=> "1234567  1\n"},    {OUT=>"1234567\t 1\n"}],
87     ['posix-6', '-a', {IN=> "1234567 1\n"},     {OUT=>"1234567 1\n"}],
88
89     # It is debatable whether this test should require an environment
90     # setting of e.g., _POSIX2_VERSION=1.
91     ['obs-ovflo', "-$limits->{UINTMAX_OFLOW}", {IN=>''}, {OUT=>''},
92      {EXIT => 1}, {ERR => "$prog: tab stop value is too large\n"}],
93
94
95     # Test input with backspaces '\b' ('bs1' is the baseline, without \b)
96     # Note: If users report errors in these tests, copy&pasting results from
97     # their terminate output might be confusing due to '\b' overriding
98     # characters. For details see '\b' tests in 'expand.pl'.
99     ['bs1', '-a -t4', {IN=>"aa  c\n"},    {OUT=>"aa\tc\n"}],
100     ['bs2', '-a -t4', {IN=>"aa\b  c\n"},  {OUT=>"aa\b  c\n"}],
101     ['bs3', '-a -t4', {IN=>"aa\b   c\n"}, {OUT=>"aa\b\tc\n"}],
102     ['bs4', '-a -t3', {IN=>"aa\b  c\n"},  {OUT=>"aa\b\tc\n"}],
103
104     # Undocumented feature:
105     #   treat "unexpand -7"  as "unexpand --first-only --tabs 7" ,
106     #   and   "unexpand -90" as "unexpand --first-only --tabs 90",
107     ['u1', '-a -3',    {IN=>"a  b  c"}, {OUT=>"a\tb\tc"}],
108     ['u2', '-a -4,9',  {IN=>"a   b    c"}, {OUT=>"a\tb\tc"}],
109     ['u3', '-a -11',   {IN=>"a          b"}, {OUT=>"a\tb"}],
110     # Test all digits (for full code coverage)
111     ['u4', '-a -2,6',  {IN=>"a b   c"}, {OUT=>"a b\tc"}],
112     ['u5', '-a -7',    {IN=>"a      b"},    {OUT=>"a\tb"}],
113     ['u6', '-a -8',    {IN=>"a       b"},    {OUT=>"a\tb"}],
114     # This syntax is handled internally as "-3, -9"
115     ['u7', '-a -3,9',  {IN=>"a  b     c"}, {OUT=>"a\tb\tc"}],
116     # Default (without -a) is --first-only:
117     ['u8', '-3',  {IN=>"   a   b"}, {OUT=>"\ta   b"}],
118
119     # Arguably this should minimize translation as is done on Solaris.
120     # I.e., not modify the input.  But since the result is equivalent,
121     # and to be consistent in output with older versions, we output
122     # a '\t' rather than a space for the second tab position.
123     # For more detailed comparison with other implementations see:
124     # https://lists.gnu.org/r/coreutils/2016-06/msg00015.html
125     # https://lists.gnu.org/r/coreutils/2016-07/msg00011.html
126     ['ts1', '-t8,9', {IN=>"x\t \t y\n"},    {OUT=>"x\t\t\t y\n"}],
127     # There is no ambiguity here. This should always be the output.
128     ['ts2', '-t5,8', {IN=>"x\t \t y\n"},    {OUT=>"x\t\t y\n"}],
129    );
130
131my $save_temps = $ENV{DEBUG};
132my $verbose = $ENV{VERBOSE};
133
134my $fail = run_tests ($program_name, $prog, \@Tests, $save_temps, $verbose);
135exit $fail;
136