Line data Source code
1 : /* comm -- compare two sorted files line by line.
2 : Copyright (C) 86, 90, 91, 1995-2005 Free Software Foundation, Inc.
3 :
4 : This program is free software: you can redistribute it and/or modify
5 : it under the terms of the GNU General Public License as published by
6 : the Free Software Foundation, either version 3 of the License, or
7 : (at your option) any later version.
8 :
9 : This program is distributed in the hope that it will be useful,
10 : but WITHOUT ANY WARRANTY; without even the implied warranty of
11 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 : GNU General Public License for more details.
13 :
14 : You should have received a copy of the GNU General Public License
15 : along with this program. If not, see <http://www.gnu.org/licenses/>. */
16 :
17 : /* Written by Richard Stallman and David MacKenzie. */
18 :
19 : #include <config.h>
20 :
21 : #include <getopt.h>
22 : #include <sys/types.h>
23 : #include "system.h"
24 : #include "linebuffer.h"
25 : #include "error.h"
26 : #include "hard-locale.h"
27 : #include "quote.h"
28 : #include "stdio--.h"
29 : #include "xmemcoll.h"
30 :
31 : /* The official name of this program (e.g., no `g' prefix). */
32 : #define PROGRAM_NAME "comm"
33 :
34 : #define AUTHORS "Richard Stallman", "David MacKenzie"
35 :
36 : /* Undefine, to avoid warning about redefinition on some systems. */
37 : #undef min
38 : #define min(x, y) ((x) < (y) ? (x) : (y))
39 :
40 : /* The name this program was run with. */
41 : char *program_name;
42 :
43 : /* True if the LC_COLLATE locale is hard. */
44 : static bool hard_LC_COLLATE;
45 :
46 : /* If true, print lines that are found only in file 1. */
47 : static bool only_file_1;
48 :
49 : /* If true, print lines that are found only in file 2. */
50 : static bool only_file_2;
51 :
52 : /* If true, print lines that are found in both files. */
53 : static bool both;
54 :
55 : static struct option const long_options[] =
56 : {
57 : {GETOPT_HELP_OPTION_DECL},
58 : {GETOPT_VERSION_OPTION_DECL},
59 : {NULL, 0, NULL, 0}
60 : };
61 :
62 :
63 :
64 : void
65 28 : usage (int status)
66 : {
67 28 : if (status != EXIT_SUCCESS)
68 26 : fprintf (stderr, _("Try `%s --help' for more information.\n"),
69 : program_name);
70 : else
71 : {
72 2 : printf (_("\
73 : Usage: %s [OPTION]... FILE1 FILE2\n\
74 : "),
75 : program_name);
76 2 : fputs (_("\
77 : Compare sorted files FILE1 and FILE2 line by line.\n\
78 : "), stdout);
79 2 : fputs (_("\
80 : \n\
81 : With no options, produce three-column output. Column one contains\n\
82 : lines unique to FILE1, column two contains lines unique to FILE2,\n\
83 : and column three contains lines common to both files.\n\
84 : "), stdout);
85 2 : fputs (_("\
86 : \n\
87 : -1 suppress lines unique to FILE1\n\
88 : -2 suppress lines unique to FILE2\n\
89 : -3 suppress lines that appear in both files\n\
90 : "), stdout);
91 2 : fputs (HELP_OPTION_DESCRIPTION, stdout);
92 2 : fputs (VERSION_OPTION_DESCRIPTION, stdout);
93 2 : emit_bug_reporting_address ();
94 : }
95 28 : exit (status);
96 : }
97 :
98 : /* Output the line in linebuffer LINE to stream STREAM
99 : provided the switches say it should be output.
100 : CLASS is 1 for a line found only in file 1,
101 : 2 for a line only in file 2, 3 for a line in both. */
102 :
103 : static void
104 108 : writeline (const struct linebuffer *line, FILE *stream, int class)
105 : {
106 108 : switch (class)
107 : {
108 25 : case 1:
109 25 : if (!only_file_1)
110 4 : return;
111 21 : break;
112 :
113 17 : case 2:
114 17 : if (!only_file_2)
115 1 : return;
116 : /* Print a TAB if we are printing lines from file 1. */
117 16 : if (only_file_1)
118 15 : putc ('\t', stream);
119 16 : break;
120 :
121 66 : case 3:
122 66 : if (!both)
123 4 : return;
124 : /* Print a TAB if we are printing lines from file 1. */
125 62 : if (only_file_1)
126 57 : putc ('\t', stream);
127 : /* Print a TAB if we are printing lines from file 2. */
128 62 : if (only_file_2)
129 58 : putc ('\t', stream);
130 62 : break;
131 : }
132 :
133 99 : fwrite (line->buffer, sizeof (char), line->length, stream);
134 : }
135 :
136 : /* Compare INFILES[0] and INFILES[1].
137 : If either is "-", use the standard input for that file.
138 : Assume that each input file is sorted;
139 : merge them and output the result. */
140 :
141 : static void
142 28 : compare_files (char **infiles)
143 : {
144 : /* For each file, we have one linebuffer in lb1. */
145 : struct linebuffer lb1[2];
146 :
147 : /* thisline[i] points to the linebuffer holding the next available line
148 : in file i, or is NULL if there are no lines left in that file. */
149 : struct linebuffer *thisline[2];
150 :
151 : /* streams[i] holds the input stream for file i. */
152 : FILE *streams[2];
153 :
154 : int i;
155 :
156 : /* Initialize the storage. */
157 70 : for (i = 0; i < 2; i++)
158 : {
159 51 : initbuffer (&lb1[i]);
160 51 : thisline[i] = &lb1[i];
161 51 : streams[i] = (STREQ (infiles[i], "-") ? stdin : fopen (infiles[i], "r"));
162 51 : if (!streams[i])
163 4 : error (EXIT_FAILURE, errno, "%s", infiles[i]);
164 :
165 47 : thisline[i] = readlinebuffer (thisline[i], streams[i]);
166 47 : if (ferror (streams[i]))
167 5 : error (EXIT_FAILURE, errno, "%s", infiles[i]);
168 : }
169 :
170 144 : while (thisline[0] || thisline[1])
171 : {
172 : int order;
173 :
174 : /* Compare the next available lines of the two files. */
175 :
176 108 : if (!thisline[0])
177 4 : order = 1;
178 104 : else if (!thisline[1])
179 3 : order = -1;
180 : else
181 : {
182 101 : if (hard_LC_COLLATE)
183 0 : order = xmemcoll (thisline[0]->buffer, thisline[0]->length - 1,
184 0 : thisline[1]->buffer, thisline[1]->length - 1);
185 : else
186 : {
187 101 : size_t len = min (thisline[0]->length, thisline[1]->length) - 1;
188 101 : order = memcmp (thisline[0]->buffer, thisline[1]->buffer, len);
189 101 : if (order == 0)
190 198 : order = (thisline[0]->length < thisline[1]->length
191 : ? -1
192 99 : : thisline[0]->length != thisline[1]->length);
193 : }
194 : }
195 :
196 : /* Output the line that is lesser. */
197 108 : if (order == 0)
198 66 : writeline (thisline[1], stdout, 3);
199 42 : else if (order > 0)
200 17 : writeline (thisline[1], stdout, 2);
201 : else
202 25 : writeline (thisline[0], stdout, 1);
203 :
204 : /* Step the file the line came from.
205 : If the files match, step both files. */
206 108 : if (order >= 0)
207 : {
208 83 : thisline[1] = readlinebuffer (thisline[1], streams[1]);
209 83 : if (ferror (streams[1]))
210 1 : error (EXIT_FAILURE, errno, "%s", infiles[1]);
211 : }
212 107 : if (order <= 0)
213 : {
214 90 : thisline[0] = readlinebuffer (thisline[0], streams[0]);
215 90 : if (ferror (streams[0]))
216 1 : error (EXIT_FAILURE, errno, "%s", infiles[0]);
217 : }
218 : }
219 :
220 36 : for (i = 0; i < 2; i++)
221 34 : if (fclose (streams[i]) != 0)
222 15 : error (EXIT_FAILURE, errno, "%s", infiles[i]);
223 2 : }
224 :
225 : int
226 57 : main (int argc, char **argv)
227 : {
228 : int c;
229 :
230 : initialize_main (&argc, &argv);
231 57 : program_name = argv[0];
232 57 : setlocale (LC_ALL, "");
233 : bindtextdomain (PACKAGE, LOCALEDIR);
234 : textdomain (PACKAGE);
235 57 : hard_LC_COLLATE = hard_locale (LC_COLLATE);
236 :
237 57 : atexit (close_stdout);
238 :
239 57 : only_file_1 = true;
240 57 : only_file_2 = true;
241 57 : both = true;
242 :
243 124 : while ((c = getopt_long (argc, argv, "123", long_options, NULL)) != -1)
244 17 : switch (c)
245 : {
246 5 : case '1':
247 5 : only_file_1 = false;
248 5 : break;
249 :
250 3 : case '2':
251 3 : only_file_2 = false;
252 3 : break;
253 :
254 2 : case '3':
255 2 : both = false;
256 2 : break;
257 :
258 2 : case_GETOPT_HELP_CHAR;
259 :
260 1 : case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
261 :
262 4 : default:
263 4 : usage (EXIT_FAILURE);
264 : }
265 :
266 50 : if (argc - optind < 2)
267 : {
268 20 : if (argc <= optind)
269 6 : error (0, 0, _("missing operand"));
270 : else
271 14 : error (0, 0, _("missing operand after %s"), quote (argv[argc - 1]));
272 20 : usage (EXIT_FAILURE);
273 : }
274 :
275 30 : if (2 < argc - optind)
276 : {
277 2 : error (0, 0, _("extra operand %s"), quote (argv[optind + 2]));
278 2 : usage (EXIT_FAILURE);
279 : }
280 :
281 28 : compare_files (argv + optind);
282 :
283 2 : exit (EXIT_SUCCESS);
284 : }
|