LCOV - code coverage report
Current view: top level - src - join.c (source / functions) Hit Total Coverage
Test: coreutils.info Lines: 395 461 85.7 %
Date: 2018-01-30 Functions: 23 23 100.0 %

          Line data    Source code
       1             : /* join - join lines of two files on a common field
       2             :    Copyright (C) 91, 1995-2006, 2008 Free Software Foundation, Inc.
       3             : 
       4             :    This program is free software: you can redistribute it and/or modify
       5             :    it under the terms of the GNU General Public License as published by
       6             :    the Free Software Foundation, either version 3 of the License, or
       7             :    (at your option) any later version.
       8             : 
       9             :    This program is distributed in the hope that it will be useful,
      10             :    but WITHOUT ANY WARRANTY; without even the implied warranty of
      11             :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      12             :    GNU General Public License for more details.
      13             : 
      14             :    You should have received a copy of the GNU General Public License
      15             :    along with this program.  If not, see <http://www.gnu.org/licenses/>.
      16             : 
      17             :    Written by Mike Haertel, mike@gnu.ai.mit.edu.  */
      18             : 
      19             : #include <config.h>
      20             : 
      21             : #include <assert.h>
      22             : #include <sys/types.h>
      23             : #include <getopt.h>
      24             : 
      25             : #include "system.h"
      26             : #include "error.h"
      27             : #include "hard-locale.h"
      28             : #include "linebuffer.h"
      29             : #include "memcasecmp.h"
      30             : #include "quote.h"
      31             : #include "stdio--.h"
      32             : #include "xmemcoll.h"
      33             : #include "xstrtol.h"
      34             : #include "argmatch.h"
      35             : 
      36             : /* The official name of this program (e.g., no `g' prefix).  */
      37             : #define PROGRAM_NAME "join"
      38             : 
      39             : #define AUTHORS "Mike Haertel"
      40             : 
      41             : #define join system_join
      42             : 
      43             : /* An element of the list identifying which fields to print for each
      44             :    output line.  */
      45             : struct outlist
      46             :   {
      47             :     /* File number: 0, 1, or 2.  0 means use the join field.
      48             :        1 means use the first file argument, 2 the second.  */
      49             :     int file;
      50             : 
      51             :     /* Field index (zero-based), specified only when FILE is 1 or 2.  */
      52             :     size_t field;
      53             : 
      54             :     struct outlist *next;
      55             :   };
      56             : 
      57             : /* A field of a line.  */
      58             : struct field
      59             :   {
      60             :     char *beg;                  /* First character in field.  */
      61             :     size_t len;                 /* The length of the field.  */
      62             :   };
      63             : 
      64             : /* A line read from an input file.  */
      65             : struct line
      66             :   {
      67             :     struct linebuffer buf;      /* The line itself.  */
      68             :     size_t nfields;             /* Number of elements in `fields'.  */
      69             :     size_t nfields_allocated;   /* Number of elements allocated for `fields'. */
      70             :     struct field *fields;
      71             :   };
      72             : 
      73             : /* One or more consecutive lines read from a file that all have the
      74             :    same join field value.  */
      75             : struct seq
      76             :   {
      77             :     size_t count;                       /* Elements used in `lines'.  */
      78             :     size_t alloc;                       /* Elements allocated in `lines'.  */
      79             :     struct line *lines;
      80             :   };
      81             : 
      82             : /* The name this program was run with.  */
      83             : char *program_name;
      84             : 
      85             : /* The previous line read from each file. */
      86             : static struct line *prevline[2];
      87             : 
      88             : /* True if the LC_COLLATE locale is hard.  */
      89             : static bool hard_LC_COLLATE;
      90             : 
      91             : /* If nonzero, print unpairable lines in file 1 or 2.  */
      92             : static bool print_unpairables_1, print_unpairables_2;
      93             : 
      94             : /* If nonzero, print pairable lines.  */
      95             : static bool print_pairables;
      96             : 
      97             : /* If nonzero, we have seen at least one unpairable line. */
      98             : static bool seen_unpairable;
      99             : 
     100             : /* If nonzero, we have warned about disorder in that file. */
     101             : static bool issued_disorder_warning[2];
     102             : 
     103             : /* Empty output field filler.  */
     104             : static char const *empty_filler;
     105             : 
     106             : /* Field to join on; SIZE_MAX means they haven't been determined yet.  */
     107             : static size_t join_field_1 = SIZE_MAX;
     108             : static size_t join_field_2 = SIZE_MAX;
     109             : 
     110             : /* List of fields to print.  */
     111             : static struct outlist outlist_head;
     112             : 
     113             : /* Last element in `outlist', where a new element can be added.  */
     114             : static struct outlist *outlist_end = &outlist_head;
     115             : 
     116             : /* Tab character separating fields.  If negative, fields are separated
     117             :    by any nonempty string of blanks, otherwise by exactly one
     118             :    tab character whose value (when cast to unsigned char) equals TAB.  */
     119             : static int tab = -1;
     120             : 
     121             : /* If nonzero, check that the input is correctly ordered. */
     122             : static enum
     123             :   {
     124             :     CHECK_ORDER_DEFAULT,
     125             :     CHECK_ORDER_ENABLED,
     126             :     CHECK_ORDER_DISABLED
     127             :   } check_input_order;
     128             : 
     129             : enum
     130             : {
     131             :   CHECK_ORDER_OPTION = CHAR_MAX + 1,
     132             :   NOCHECK_ORDER_OPTION
     133             : };
     134             : 
     135             : 
     136             : static struct option const longopts[] =
     137             : {
     138             :   {"ignore-case", no_argument, NULL, 'i'},
     139             :   {"check-order", no_argument, NULL, CHECK_ORDER_OPTION},
     140             :   {"nocheck-order", no_argument, NULL, NOCHECK_ORDER_OPTION},
     141             :   {GETOPT_HELP_OPTION_DECL},
     142             :   {GETOPT_VERSION_OPTION_DECL},
     143             :   {NULL, 0, NULL, 0}
     144             : };
     145             : 
     146             : /* Used to print non-joining lines */
     147             : static struct line uni_blank;
     148             : 
     149             : /* If nonzero, ignore case when comparing join fields.  */
     150             : static bool ignore_case;
     151             : 
     152             : void
     153          49 : usage (int status)
     154             : {
     155          49 :   if (status != EXIT_SUCCESS)
     156          47 :     fprintf (stderr, _("Try `%s --help' for more information.\n"),
     157             :              program_name);
     158             :   else
     159             :     {
     160           2 :       printf (_("\
     161             : Usage: %s [OPTION]... FILE1 FILE2\n\
     162             : "),
     163             :               program_name);
     164           2 :       fputs (_("\
     165             : For each pair of input lines with identical join fields, write a line to\n\
     166             : standard output.  The default join field is the first, delimited\n\
     167             : by whitespace.  When FILE1 or FILE2 (not both) is -, read standard input.\n\
     168             : \n\
     169             :   -a FILENUM        print unpairable lines coming from file FILENUM, where\n\
     170             :                       FILENUM is 1 or 2, corresponding to FILE1 or FILE2\n\
     171             :   -e EMPTY          replace missing input fields with EMPTY\n\
     172             : "), stdout);
     173           2 :       fputs (_("\
     174             :   -i, --ignore-case  ignore differences in case when comparing fields\n\
     175             :   -j FIELD          equivalent to `-1 FIELD -2 FIELD'\n\
     176             :   -o FORMAT         obey FORMAT while constructing output line\n\
     177             :   -t CHAR           use CHAR as input and output field separator\n\
     178             : "), stdout);
     179           2 :       fputs (_("\
     180             :   -v FILENUM        like -a FILENUM, but suppress joined output lines\n\
     181             :   -1 FIELD          join on this FIELD of file 1\n\
     182             :   -2 FIELD          join on this FIELD of file 2\n\
     183             :   --check-order     check that the input is correctly sorted, even\n\
     184             :                       if all input lines are pairable\n\
     185             :   --nocheck-order   do not check that the input is correctly sorted\n\
     186             : "), stdout);
     187           2 :       fputs (HELP_OPTION_DESCRIPTION, stdout);
     188           2 :       fputs (VERSION_OPTION_DESCRIPTION, stdout);
     189           2 :       fputs (_("\
     190             : \n\
     191             : Unless -t CHAR is given, leading blanks separate fields and are ignored,\n\
     192             : else fields are separated by CHAR.  Any FIELD is a field number counted\n\
     193             : from 1.  FORMAT is one or more comma or blank separated specifications,\n\
     194             : each being `FILENUM.FIELD' or `0'.  Default FORMAT outputs the join field,\n\
     195             : the remaining fields from FILE1, the remaining fields from FILE2, all\n\
     196             : separated by CHAR.\n\
     197             : \n\
     198             : Important: FILE1 and FILE2 must be sorted on the join fields.\n\
     199             : E.g., use `sort -k 1b,1' if `join' has no options.\n\
     200             : If the input is not sorted and some lines cannot be joined, a\n\
     201             : warning message will be given.\n\
     202             : "), stdout);
     203           2 :       emit_bug_reporting_address ();
     204             :     }
     205          49 :   exit (status);
     206             : }
     207             : 
     208             : /* Record a field in LINE, with location FIELD and size LEN.  */
     209             : 
     210             : static void
     211          48 : extract_field (struct line *line, char *field, size_t len)
     212             : {
     213          48 :   if (line->nfields >= line->nfields_allocated)
     214             :     {
     215          44 :       line->fields = X2NREALLOC (line->fields, &line->nfields_allocated);
     216             :     }
     217          48 :   line->fields[line->nfields].beg = field;
     218          48 :   line->fields[line->nfields].len = len;
     219          48 :   ++(line->nfields);
     220          48 : }
     221             : 
     222             : /* Fill in the `fields' structure in LINE.  */
     223             : 
     224             : static void
     225         313 : xfields (struct line *line)
     226             : {
     227         313 :   char *ptr = line->buf.buffer;
     228         313 :   char const *lim = ptr + line->buf.length - 1;
     229             : 
     230         313 :   if (ptr == lim)
     231         267 :     return;
     232             : 
     233          46 :   if (0 <= tab)
     234             :     {
     235             :       char *sep;
     236           0 :       for (; (sep = memchr (ptr, tab, lim - ptr)) != NULL; ptr = sep + 1)
     237           0 :         extract_field (line, ptr, sep - ptr);
     238             :     }
     239             :   else
     240             :     {
     241             :       /* Skip leading blanks before the first field.  */
     242          92 :       while (isblank (to_uchar (*ptr)))
     243           2 :         if (++ptr == lim)
     244           2 :           return;
     245             : 
     246             :       do
     247             :         {
     248             :           char *sep;
     249          48 :           for (sep = ptr + 1; sep != lim && ! isblank (to_uchar (*sep)); sep++)
     250           2 :             continue;
     251          46 :           extract_field (line, ptr, sep - ptr);
     252          46 :           if (sep == lim)
     253          42 :             return;
     254           5 :           for (ptr = sep + 1; ptr != lim && isblank (to_uchar (*ptr)); ptr++)
     255           1 :             continue;
     256             :         }
     257           4 :       while (ptr != lim);
     258             :     }
     259             : 
     260           2 :   extract_field (line, ptr, lim - ptr);
     261             : }
     262             : 
     263             : static struct line *
     264         313 : dup_line (const struct line *old)
     265             : {
     266         313 :   struct line *newline = xmalloc (sizeof *newline);
     267             :   size_t i;
     268             : 
     269             :   /* Duplicate the buffer. */
     270         313 :   initbuffer (&newline->buf);
     271         313 :   newline->buf.buffer = xmalloc (old->buf.size);
     272         313 :   newline->buf.size = old->buf.size;
     273         313 :   memcpy (newline->buf.buffer, old->buf.buffer, old->buf.length);
     274         313 :   newline->buf.length = old->buf.length;
     275             : 
     276             :   /* Duplicate the field positions. */
     277         313 :   newline->fields = xnmalloc (old->nfields_allocated, sizeof *newline->fields);
     278         313 :   newline->nfields = old->nfields;
     279         313 :   newline->nfields_allocated = old->nfields_allocated;
     280             : 
     281         361 :   for (i = 0; i < old->nfields; i++)
     282             :     {
     283          48 :       newline->fields[i].len = old->fields[i].len;
     284          96 :       newline->fields[i].beg = newline->buf.buffer + (old->fields[i].beg
     285          48 :                                                       - old->buf.buffer);
     286             :     }
     287         313 :   return newline;
     288             : }
     289             : 
     290             : static void
     291         558 : freeline (struct line *line)
     292             : {
     293         558 :   free (line->fields);
     294         558 :   free (line->buf.buffer);
     295         558 :   line->buf.buffer = NULL;
     296         558 : }
     297             : 
     298             : /* Return <0 if the join field in LINE1 compares less than the one in LINE2;
     299             :    >0 if it compares greater; 0 if it compares equal.
     300             :    Report an error and exit if the comparison fails.
     301             :    Use join fields JF_1 and JF_2 respectively.  */
     302             : 
     303             : static int
     304         340 : keycmp (struct line const *line1, struct line const *line2,
     305             :         size_t jf_1, size_t jf_2)
     306             : {
     307             :   /* Start of field to compare in each file.  */
     308             :   char *beg1;
     309             :   char *beg2;
     310             : 
     311             :   size_t len1;
     312             :   size_t len2;          /* Length of fields to compare.  */
     313             :   int diff;
     314             : 
     315         340 :   if (jf_1 < line1->nfields)
     316             :     {
     317          98 :       beg1 = line1->fields[jf_1].beg;
     318          98 :       len1 = line1->fields[jf_1].len;
     319             :     }
     320             :   else
     321             :     {
     322         242 :       beg1 = NULL;
     323         242 :       len1 = 0;
     324             :     }
     325             : 
     326         340 :   if (jf_2 < line2->nfields)
     327             :     {
     328          68 :       beg2 = line2->fields[jf_2].beg;
     329          68 :       len2 = line2->fields[jf_2].len;
     330             :     }
     331             :   else
     332             :     {
     333         272 :       beg2 = NULL;
     334         272 :       len2 = 0;
     335             :     }
     336             : 
     337         340 :   if (len1 == 0)
     338         242 :     return len2 == 0 ? 0 : -1;
     339          98 :   if (len2 == 0)
     340          79 :     return 1;
     341             : 
     342          19 :   if (ignore_case)
     343             :     {
     344             :       /* FIXME: ignore_case does not work with NLS (in particular,
     345             :          with multibyte chars).  */
     346           3 :       diff = memcasecmp (beg1, beg2, MIN (len1, len2));
     347             :     }
     348             :   else
     349             :     {
     350          16 :       if (hard_LC_COLLATE)
     351           0 :         return xmemcoll (beg1, len1, beg2, len2);
     352          16 :       diff = memcmp (beg1, beg2, MIN (len1, len2));
     353             :     }
     354             : 
     355          19 :   if (diff)
     356           3 :     return diff;
     357          16 :   return len1 < len2 ? -1 : len1 != len2;
     358             : }
     359             : 
     360             : /* Check that successive input lines PREV and CURRENT from input file
     361             :    WHATFILE are presented in order, unless the user may be relying on
     362             :    the GNU extension that input lines may be out of order if no input
     363             :    lines are unpairable.
     364             : 
     365             :    If the user specified --nocheck-order, the check is not made.
     366             :    If the user specified --check-order, the problem is fatal.
     367             :    Otherwise (the default), the message is simply a warning.
     368             : 
     369             :    A message is printed at most once per input file. */
     370             : 
     371             : static void
     372         245 : check_order (const struct line *prev,
     373             :              const struct line *current,
     374             :              int whatfile)
     375             : {
     376         245 :   if (check_input_order != CHECK_ORDER_DISABLED
     377         231 :       && ((check_input_order == CHECK_ORDER_ENABLED) || seen_unpairable))
     378             :     {
     379          95 :       if (!issued_disorder_warning[whatfile-1])
     380             :         {
     381          71 :           size_t join_field = whatfile == 1 ? join_field_1 : join_field_2;
     382          71 :           if (keycmp (prev, current, join_field, join_field) > 0)
     383             :             {
     384          14 :               error ((check_input_order == CHECK_ORDER_ENABLED
     385             :                       ? EXIT_FAILURE : 0),
     386             :                      0, _("File %d is not in sorted order"), whatfile);
     387             : 
     388             :               /* If we get to here, the message was just a warning, but we
     389             :                  want only to issue it once. */
     390          14 :               issued_disorder_warning[whatfile-1] = true;
     391             :             }
     392             :         }
     393             :     }
     394         245 : }
     395             : 
     396             : /* Read a line from FP into LINE and split it into fields.
     397             :    Return true if successful.  */
     398             : 
     399             : static bool
     400         363 : get_line (FILE *fp, struct line *line, int which)
     401             : {
     402         363 :   initbuffer (&line->buf);
     403             : 
     404         363 :   if (! readlinebuffer (&line->buf, fp))
     405             :     {
     406          50 :       if (ferror (fp))
     407          25 :         error (EXIT_FAILURE, errno, _("read error"));
     408          25 :       free (line->buf.buffer);
     409          25 :       line->buf.buffer = NULL;
     410          25 :       return false;
     411             :     }
     412             : 
     413         313 :   line->nfields_allocated = 0;
     414         313 :   line->nfields = 0;
     415         313 :   line->fields = NULL;
     416         313 :   xfields (line);
     417             : 
     418         313 :   if (prevline[which - 1])
     419             :     {
     420         245 :       check_order (prevline[which - 1], line, which);
     421         245 :       freeline (prevline[which - 1]);
     422         245 :       free (prevline[which - 1]);
     423             :     }
     424         313 :   prevline[which - 1] = dup_line (line);
     425         313 :   return true;
     426             : }
     427             : 
     428             : static void
     429         132 : free_prevline (void)
     430             : {
     431             :   size_t i;
     432             : 
     433         396 :   for (i = 0; i < ARRAY_CARDINALITY (prevline); i++)
     434             :     {
     435         264 :       if (prevline[i])
     436          68 :         freeline (prevline[i]);
     437         264 :       free (prevline[i]);
     438         264 :       prevline[i] = NULL;
     439             :     }
     440         132 : }
     441             : 
     442             : static void
     443          82 : initseq (struct seq *seq)
     444             : {
     445          82 :   seq->count = 0;
     446          82 :   seq->alloc = 0;
     447          82 :   seq->lines = NULL;
     448          82 : }
     449             : 
     450             : /* Read a line from FP and add it to SEQ.  Return true if successful.  */
     451             : 
     452             : static bool
     453         342 : getseq (FILE *fp, struct seq *seq, int whichfile)
     454             : {
     455         342 :   if (seq->count == seq->alloc)
     456         188 :     seq->lines = X2NREALLOC (seq->lines, &seq->alloc);
     457             : 
     458         342 :   if (get_line (fp, &seq->lines[seq->count], whichfile))
     459             :     {
     460         296 :       ++seq->count;
     461         296 :       return true;
     462             :     }
     463          23 :   return false;
     464             : }
     465             : 
     466             : /* Read a line from FP and add it to SEQ, as the first item if FIRST is
     467             :    true, else as the next.  */
     468             : static bool
     469         260 : advance_seq (FILE *fp, struct seq *seq, bool first, int whichfile)
     470             : {
     471         260 :   if (first)
     472             :     {
     473          80 :       freeline (&seq->lines[0]);
     474          80 :       seq->count = 0;
     475             :     }
     476         260 :   return getseq (fp, seq, whichfile);
     477             : }
     478             : 
     479             : static void
     480          32 : delseq (struct seq *seq)
     481             : {
     482             :   size_t i;
     483          44 :   for (i = 0; i < seq->count; i++)
     484          12 :     if (seq->lines[i].buf.buffer)
     485           2 :       freeline (&seq->lines[i]);
     486          32 :   free (seq->lines);
     487          32 : }
     488             : 
     489             : 
     490             : /* Print field N of LINE if it exists and is nonempty, otherwise
     491             :    `empty_filler' if it is nonempty.  */
     492             : 
     493             : static void
     494         258 : prfield (size_t n, struct line const *line)
     495             : {
     496             :   size_t len;
     497             : 
     498         258 :   if (n < line->nfields)
     499             :     {
     500          13 :       len = line->fields[n].len;
     501          13 :       if (len)
     502          13 :         fwrite (line->fields[n].beg, 1, len, stdout);
     503           0 :       else if (empty_filler)
     504           0 :         fputs (empty_filler, stdout);
     505             :     }
     506         245 :   else if (empty_filler)
     507           0 :     fputs (empty_filler, stdout);
     508         258 : }
     509             : 
     510             : /* Print the join of LINE1 and LINE2.  */
     511             : 
     512             : static void
     513         258 : prjoin (struct line const *line1, struct line const *line2)
     514             : {
     515             :   const struct outlist *outlist;
     516         258 :   char output_separator = tab < 0 ? ' ' : tab;
     517             : 
     518         258 :   outlist = outlist_head.next;
     519         258 :   if (outlist)
     520             :     {
     521             :       const struct outlist *o;
     522             : 
     523           0 :       o = outlist;
     524             :       while (1)
     525           0 :         {
     526             :           size_t field;
     527             :           struct line const *line;
     528             : 
     529           0 :           if (o->file == 0)
     530             :             {
     531           0 :               if (line1 == &uni_blank)
     532             :                 {
     533           0 :                   line = line2;
     534           0 :                   field = join_field_2;
     535             :                 }
     536             :               else
     537             :                 {
     538           0 :                   line = line1;
     539           0 :                   field = join_field_1;
     540             :                 }
     541             :             }
     542             :           else
     543             :             {
     544           0 :               line = (o->file == 1 ? line1 : line2);
     545           0 :               field = o->field;
     546             :             }
     547           0 :           prfield (field, line);
     548           0 :           o = o->next;
     549           0 :           if (o == NULL)
     550           0 :             break;
     551           0 :           putchar (output_separator);
     552             :         }
     553           0 :       putchar ('\n');
     554             :     }
     555             :   else
     556             :     {
     557             :       size_t i;
     558             : 
     559         258 :       if (line1 == &uni_blank)
     560             :         {
     561             :           struct line const *t;
     562           0 :           t = line1;
     563           0 :           line1 = line2;
     564           0 :           line2 = t;
     565             :         }
     566         258 :       prfield (join_field_1, line1);
     567         258 :       for (i = 0; i < join_field_1 && i < line1->nfields; ++i)
     568             :         {
     569           0 :           putchar (output_separator);
     570           0 :           prfield (i, line1);
     571             :         }
     572         258 :       for (i = join_field_1 + 1; i < line1->nfields; ++i)
     573             :         {
     574           0 :           putchar (output_separator);
     575           0 :           prfield (i, line1);
     576             :         }
     577             : 
     578         258 :       for (i = 0; i < join_field_2 && i < line2->nfields; ++i)
     579             :         {
     580           0 :           putchar (output_separator);
     581           0 :           prfield (i, line2);
     582             :         }
     583         258 :       for (i = join_field_2 + 1; i < line2->nfields; ++i)
     584             :         {
     585           0 :           putchar (output_separator);
     586           0 :           prfield (i, line2);
     587             :         }
     588         258 :       putchar ('\n');
     589             :     }
     590         258 : }
     591             : 
     592             : /* Print the join of the files in FP1 and FP2.  */
     593             : 
     594             : static void
     595          41 : join (FILE *fp1, FILE *fp2)
     596             : {
     597             :   struct seq seq1, seq2;
     598             :   struct line line;
     599             :   int diff;
     600             :   bool eof1, eof2, checktail;
     601             : 
     602             :   /* Read the first line of each file.  */
     603          41 :   initseq (&seq1);
     604          41 :   getseq (fp1, &seq1, 1);
     605          41 :   initseq (&seq2);
     606          41 :   getseq (fp2, &seq2, 2);
     607             : 
     608         157 :   while (seq1.count && seq2.count)
     609             :     {
     610             :       size_t i;
     611         112 :       diff = keycmp (&seq1.lines[0], &seq2.lines[0],
     612             :                      join_field_1, join_field_2);
     613         112 :       if (diff < 0)
     614             :         {
     615          32 :           if (print_unpairables_1)
     616           0 :             prjoin (&seq1.lines[0], &uni_blank);
     617          32 :           advance_seq (fp1, &seq1, true, 1);
     618          30 :           seen_unpairable = true;
     619          30 :           continue;
     620             :         }
     621          80 :       if (diff > 0)
     622             :         {
     623          48 :           if (print_unpairables_2)
     624           0 :             prjoin (&uni_blank, &seq2.lines[0]);
     625          48 :           advance_seq (fp2, &seq2, true, 2);
     626          48 :           seen_unpairable = true;
     627          48 :           continue;
     628             :         }
     629             : 
     630             :       /* Keep reading lines from file1 as long as they continue to
     631             :          match the current line from file2.  */
     632          32 :       eof1 = false;
     633             :       do
     634         111 :         if (!advance_seq (fp1, &seq1, false, 1))
     635             :           {
     636           8 :             eof1 = true;
     637           8 :             ++seq1.count;
     638           8 :             break;
     639             :           }
     640          97 :       while (!keycmp (&seq1.lines[seq1.count - 1], &seq2.lines[0],
     641          97 :                       join_field_1, join_field_2));
     642             : 
     643             :       /* Keep reading lines from file2 as long as they continue to
     644             :          match the current line from file1.  */
     645          26 :       eof2 = false;
     646             :       do
     647          69 :         if (!advance_seq (fp2, &seq2, false, 2))
     648             :           {
     649           8 :             eof2 = true;
     650           8 :             ++seq2.count;
     651           8 :             break;
     652             :           }
     653          60 :       while (!keycmp (&seq1.lines[0], &seq2.lines[seq2.count - 1],
     654          60 :                       join_field_1, join_field_2));
     655             : 
     656          25 :       if (print_pairables)
     657             :         {
     658          91 :           for (i = 0; i < seq1.count - 1; ++i)
     659             :             {
     660             :               size_t j;
     661         324 :               for (j = 0; j < seq2.count - 1; ++j)
     662         258 :                 prjoin (&seq1.lines[i], &seq2.lines[j]);
     663             :             }
     664             :         }
     665             : 
     666          91 :       for (i = 0; i < seq1.count - 1; ++i)
     667          66 :         freeline (&seq1.lines[i]);
     668          25 :       if (!eof1)
     669             :         {
     670          18 :           seq1.lines[0] = seq1.lines[seq1.count - 1];
     671          18 :           seq1.count = 1;
     672             :         }
     673             :       else
     674           7 :         seq1.count = 0;
     675             : 
     676          93 :       for (i = 0; i < seq2.count - 1; ++i)
     677          68 :         freeline (&seq2.lines[i]);
     678          25 :       if (!eof2)
     679             :         {
     680          17 :           seq2.lines[0] = seq2.lines[seq2.count - 1];
     681          17 :           seq2.count = 1;
     682             :         }
     683             :       else
     684           8 :         seq2.count = 0;
     685             :     }
     686             : 
     687             :   /* If the user did not specify --check-order, and the we read the
     688             :      tail ends of both inputs to verify that they are in order.  We
     689             :      skip the rest of the tail once we have issued a warning for that
     690             :      file, unless we actually need to print the unpairable lines.  */
     691          18 :   if (check_input_order != CHECK_ORDER_DISABLED
     692          18 :       && !(issued_disorder_warning[0] && issued_disorder_warning[1]))
     693          16 :     checktail = true;
     694             :   else
     695           2 :     checktail = false;
     696             : 
     697          18 :   if ((print_unpairables_1 || checktail) && seq1.count)
     698             :     {
     699           8 :       if (print_unpairables_1)
     700           0 :         prjoin (&seq1.lines[0], &uni_blank);
     701           8 :       freeline (&seq1.lines[0]);
     702           8 :       seen_unpairable = true;
     703          21 :       while (get_line (fp1, &line, 1))
     704             :         {
     705          11 :           if (print_unpairables_1)
     706           0 :             prjoin (&line, &uni_blank);
     707          11 :           freeline (&line);
     708          11 :           if (issued_disorder_warning[0] && !print_unpairables_1)
     709           6 :             break;
     710             :         }
     711             :     }
     712             : 
     713          16 :   if ((print_unpairables_2 || checktail) && seq2.count)
     714             :     {
     715           4 :       if (print_unpairables_2)
     716           0 :         prjoin (&uni_blank, &seq2.lines[0]);
     717           4 :       freeline (&seq2.lines[0]);
     718           4 :       seen_unpairable = true;
     719          12 :       while (get_line (fp2, &line, 2))
     720             :         {
     721           6 :           if (print_unpairables_2)
     722           0 :             prjoin (&uni_blank, &line);
     723           6 :           freeline (&line);
     724           6 :           if (issued_disorder_warning[1] && !print_unpairables_2)
     725           2 :             break;
     726             :         }
     727             :     }
     728             : 
     729          16 :   delseq (&seq1);
     730          16 :   delseq (&seq2);
     731          16 : }
     732             : 
     733             : /* Add a field spec for field FIELD of file FILE to `outlist'.  */
     734             : 
     735             : static void
     736           2 : add_field (int file, size_t field)
     737             : {
     738             :   struct outlist *o;
     739             : 
     740           2 :   assert (file == 0 || file == 1 || file == 2);
     741           2 :   assert (file != 0 || field == 0);
     742             : 
     743           2 :   o = xmalloc (sizeof *o);
     744           2 :   o->file = file;
     745           2 :   o->field = field;
     746           2 :   o->next = NULL;
     747             : 
     748             :   /* Add to the end of the list so the fields are in the right order.  */
     749           2 :   outlist_end->next = o;
     750           2 :   outlist_end = o;
     751           2 : }
     752             : 
     753             : /* Convert a string of decimal digits, STR (the 1-based join field number),
     754             :    to an integral value.  Upon successful conversion, return one less
     755             :    (the zero-based field number).  Silently convert too-large values
     756             :    to SIZE_MAX - 1.  Otherwise, if a value cannot be converted, give a
     757             :    diagnostic and exit.  */
     758             : 
     759             : static size_t
     760          11 : string_to_join_field (char const *str)
     761             : {
     762             :   size_t result;
     763             :   unsigned long int val;
     764             :   verify (SIZE_MAX <= ULONG_MAX);
     765             : 
     766          11 :   strtol_error s_err = xstrtoul (str, NULL, 10, &val, "");
     767          11 :   if (s_err == LONGINT_OVERFLOW || (s_err == LONGINT_OK && SIZE_MAX < val))
     768           0 :     val = SIZE_MAX;
     769          11 :   else if (s_err != LONGINT_OK || val == 0)
     770           8 :     error (EXIT_FAILURE, 0, _("invalid field number: %s"), quote (str));
     771             : 
     772           3 :   result = val - 1;
     773             : 
     774           3 :   return result;
     775             : }
     776             : 
     777             : /* Convert a single field specifier string, S, to a *FILE_INDEX, *FIELD_INDEX
     778             :    pair.  In S, the field index string is 1-based; *FIELD_INDEX is zero-based.
     779             :    If S is valid, return true.  Otherwise, give a diagnostic and exit.  */
     780             : 
     781             : static void
     782          10 : decode_field_spec (const char *s, int *file_index, size_t *field_index)
     783             : {
     784             :   /* The first character must be 0, 1, or 2.  */
     785          10 :   switch (s[0])
     786             :     {
     787           3 :     case '0':
     788           3 :       if (s[1])
     789             :         {
     790             :           /* `0' must be all alone -- no `.FIELD'.  */
     791           1 :           error (EXIT_FAILURE, 0, _("invalid field specifier: %s"), quote (s));
     792             :         }
     793           2 :       *file_index = 0;
     794           2 :       *field_index = 0;
     795           2 :       break;
     796             : 
     797           3 :     case '1':
     798             :     case '2':
     799           3 :       if (s[1] != '.')
     800           2 :         error (EXIT_FAILURE, 0, _("invalid field specifier: %s"), quote (s));
     801           1 :       *file_index = s[0] - '0';
     802           1 :       *field_index = string_to_join_field (s + 2);
     803           0 :       break;
     804             : 
     805           4 :     default:
     806           4 :       error (EXIT_FAILURE, 0,
     807             :              _("invalid file number in field spec: %s"), quote (s));
     808             : 
     809             :       /* Tell gcc -W -Wall that we can't get beyond this point.
     810             :          This avoids a warning (otherwise legit) that the caller's copies
     811             :          of *file_index and *field_index might be used uninitialized.  */
     812           0 :       abort ();
     813             : 
     814             :       break;
     815             :     }
     816           2 : }
     817             : 
     818             : /* Add the comma or blank separated field spec(s) in STR to `outlist'.  */
     819             : 
     820             : static void
     821           9 : add_field_list (char *str)
     822             : {
     823           9 :   char *p = str;
     824             : 
     825             :   do
     826             :     {
     827             :       int file_index;
     828             :       size_t field_index;
     829          10 :       char const *spec_item = p;
     830             : 
     831          10 :       p = strpbrk (p, ", \t");
     832          10 :       if (p)
     833           4 :         *p++ = '\0';
     834          10 :       decode_field_spec (spec_item, &file_index, &field_index);
     835           2 :       add_field (file_index, field_index);
     836             :     }
     837           2 :   while (p);
     838           1 : }
     839             : 
     840             : /* Set the join field *VAR to VAL, but report an error if *VAR is set
     841             :    more than once to incompatible values.  */
     842             : 
     843             : static void
     844           8 : set_join_field (size_t *var, size_t val)
     845             : {
     846           8 :   if (*var != SIZE_MAX && *var != val)
     847             :     {
     848           0 :       unsigned long int var1 = *var + 1;
     849           0 :       unsigned long int val1 = val + 1;
     850           0 :       error (EXIT_FAILURE, 0, _("incompatible join fields %lu, %lu"),
     851             :              var1, val1);
     852             :     }
     853           8 :   *var = val;
     854           8 : }
     855             : 
     856             : /* Status of command-line arguments.  */
     857             : 
     858             : enum operand_status
     859             :   {
     860             :     /* This argument must be an operand, i.e., one of the files to be
     861             :        joined.  */
     862             :     MUST_BE_OPERAND,
     863             : 
     864             :     /* This might be the argument of the preceding -j1 or -j2 option,
     865             :        or it might be an operand.  */
     866             :     MIGHT_BE_J1_ARG,
     867             :     MIGHT_BE_J2_ARG,
     868             : 
     869             :     /* This might be the argument of the preceding -o option, or it might be
     870             :        an operand.  */
     871             :     MIGHT_BE_O_ARG
     872             :   };
     873             : 
     874             : /* Add NAME to the array of input file NAMES with operand statuses
     875             :    OPERAND_STATUS; currently there are NFILES names in the list.  */
     876             : 
     877             : static void
     878         132 : add_file_name (char *name, char *names[2],
     879             :                int operand_status[2], int joption_count[2], int *nfiles,
     880             :                int *prev_optc_status, int *optc_status)
     881             : {
     882         132 :   int n = *nfiles;
     883             : 
     884         132 :   if (n == 2)
     885             :     {
     886           1 :       bool op0 = (operand_status[0] == MUST_BE_OPERAND);
     887           1 :       char *arg = names[op0];
     888           1 :       switch (operand_status[op0])
     889             :         {
     890           1 :         case MUST_BE_OPERAND:
     891           1 :           error (0, 0, _("extra operand %s"), quote (name));
     892           1 :           usage (EXIT_FAILURE);
     893             : 
     894           0 :         case MIGHT_BE_J1_ARG:
     895           0 :           joption_count[0]--;
     896           0 :           set_join_field (&join_field_1, string_to_join_field (arg));
     897           0 :           break;
     898             : 
     899           0 :         case MIGHT_BE_J2_ARG:
     900           0 :           joption_count[1]--;
     901           0 :           set_join_field (&join_field_2, string_to_join_field (arg));
     902           0 :           break;
     903             : 
     904           0 :         case MIGHT_BE_O_ARG:
     905           0 :           add_field_list (arg);
     906           0 :           break;
     907             :         }
     908           0 :       if (!op0)
     909             :         {
     910           0 :           operand_status[0] = operand_status[1];
     911           0 :           names[0] = names[1];
     912             :         }
     913           0 :       n = 1;
     914             :     }
     915             : 
     916         131 :   operand_status[n] = *prev_optc_status;
     917         131 :   names[n] = name;
     918         131 :   *nfiles = n + 1;
     919         131 :   if (*prev_optc_status == MIGHT_BE_O_ARG)
     920           0 :     *optc_status = MIGHT_BE_O_ARG;
     921         131 : }
     922             : 
     923             : int
     924         132 : main (int argc, char **argv)
     925             : {
     926             :   int optc_status;
     927         132 :   int prev_optc_status = MUST_BE_OPERAND;
     928             :   int operand_status[2];
     929         132 :   int joption_count[2] = { 0, 0 };
     930             :   char *names[2];
     931             :   FILE *fp1, *fp2;
     932             :   int optc;
     933         132 :   int nfiles = 0;
     934             :   int i;
     935             : 
     936             :   initialize_main (&argc, &argv);
     937         132 :   program_name = argv[0];
     938         132 :   setlocale (LC_ALL, "");
     939             :   bindtextdomain (PACKAGE, LOCALEDIR);
     940             :   textdomain (PACKAGE);
     941         132 :   hard_LC_COLLATE = hard_locale (LC_COLLATE);
     942             : 
     943         132 :   atexit (close_stdout);
     944         132 :   atexit (free_prevline);
     945             : 
     946         132 :   print_pairables = true;
     947         132 :   seen_unpairable = false;
     948         132 :   issued_disorder_warning[0] = issued_disorder_warning[1] = false;
     949         132 :   check_input_order = CHECK_ORDER_DEFAULT;
     950             : 
     951         381 :   while ((optc = getopt_long (argc, argv, "-a:e:i1:2:j:o:t:v:",
     952             :                               longopts, NULL))
     953             :          != -1)
     954             :     {
     955         159 :       optc_status = MUST_BE_OPERAND;
     956             : 
     957         159 :       switch (optc)
     958             :         {
     959           1 :         case 'v':
     960           1 :             print_pairables = false;
     961             :             /* Fall through.  */
     962             : 
     963          15 :         case 'a':
     964             :           {
     965             :             unsigned long int val;
     966          15 :             if (xstrtoul (optarg, NULL, 10, &val, "") != LONGINT_OK
     967           5 :                 || (val != 1 && val != 2))
     968          12 :               error (EXIT_FAILURE, 0,
     969             :                      _("invalid field number: %s"), quote (optarg));
     970           3 :             if (val == 1)
     971           2 :               print_unpairables_1 = true;
     972             :             else
     973           1 :               print_unpairables_2 = true;
     974             :           }
     975           3 :           break;
     976             : 
     977           6 :         case 'e':
     978           6 :           if (empty_filler && ! STREQ (empty_filler, optarg))
     979           1 :             error (EXIT_FAILURE, 0,
     980             :                    _("conflicting empty-field replacement strings"));
     981           5 :           empty_filler = optarg;
     982           5 :           break;
     983             : 
     984           8 :         case 'i':
     985           8 :           ignore_case = true;
     986           8 :           break;
     987             : 
     988           3 :         case '1':
     989           3 :           set_join_field (&join_field_1, string_to_join_field (optarg));
     990           0 :           break;
     991             : 
     992           3 :         case '2':
     993           3 :           set_join_field (&join_field_2, string_to_join_field (optarg));
     994           0 :           break;
     995             : 
     996           6 :         case 'j':
     997           6 :           if ((optarg[0] == '1' || optarg[0] == '2') && !optarg[1]
     998           4 :               && optarg == argv[optind - 1] + 2)
     999           2 :             {
    1000             :               /* The argument was either "-j1" or "-j2".  */
    1001           2 :               bool is_j2 = (optarg[0] == '2');
    1002           2 :               joption_count[is_j2]++;
    1003           2 :               optc_status = MIGHT_BE_J1_ARG + is_j2;
    1004             :             }
    1005             :           else
    1006             :             {
    1007           4 :               set_join_field (&join_field_1, string_to_join_field (optarg));
    1008           3 :               set_join_field (&join_field_2, join_field_1);
    1009             :             }
    1010           5 :           break;
    1011             : 
    1012           9 :         case 'o':
    1013           9 :           add_field_list (optarg);
    1014           1 :           optc_status = MIGHT_BE_O_ARG;
    1015           1 :           break;
    1016             : 
    1017           4 :         case 't':
    1018             :           {
    1019           4 :             unsigned char newtab = optarg[0];
    1020           4 :             if (! newtab)
    1021           1 :               error (EXIT_FAILURE, 0, _("empty tab"));
    1022           3 :             if (optarg[1])
    1023             :               {
    1024           2 :                 if (STREQ (optarg, "\\0"))
    1025           1 :                   newtab = '\0';
    1026             :                 else
    1027           1 :                   error (EXIT_FAILURE, 0, _("multi-character tab %s"),
    1028             :                          quote (optarg));
    1029             :               }
    1030           2 :             if (0 <= tab && tab != newtab)
    1031           0 :               error (EXIT_FAILURE, 0, _("incompatible tabs"));
    1032           2 :             tab = newtab;
    1033             :           }
    1034           2 :           break;
    1035             : 
    1036           3 :         case NOCHECK_ORDER_OPTION:
    1037           3 :           check_input_order = CHECK_ORDER_DISABLED;
    1038           3 :           break;
    1039             : 
    1040           2 :         case CHECK_ORDER_OPTION:
    1041           2 :           check_input_order = CHECK_ORDER_ENABLED;
    1042           2 :           break;
    1043             : 
    1044          89 :         case 1:         /* Non-option argument.  */
    1045          89 :           add_file_name (optarg, names, operand_status, joption_count,
    1046             :                          &nfiles, &prev_optc_status, &optc_status);
    1047          88 :           break;
    1048             : 
    1049           2 :         case_GETOPT_HELP_CHAR;
    1050             : 
    1051           1 :         case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
    1052             : 
    1053           8 :         default:
    1054           8 :           usage (EXIT_FAILURE);
    1055             :         }
    1056             : 
    1057         117 :       prev_optc_status = optc_status;
    1058             :     }
    1059             : 
    1060             :   /* Process any operands after "--".  */
    1061          90 :   prev_optc_status = MUST_BE_OPERAND;
    1062         223 :   while (optind < argc)
    1063          43 :     add_file_name (argv[optind++], names, operand_status, joption_count,
    1064             :                    &nfiles, &prev_optc_status, &optc_status);
    1065             : 
    1066          90 :   if (nfiles != 2)
    1067             :     {
    1068          38 :       if (nfiles == 0)
    1069          16 :         error (0, 0, _("missing operand"));
    1070             :       else
    1071          22 :         error (0, 0, _("missing operand after %s"), quote (argv[argc - 1]));
    1072          38 :       usage (EXIT_FAILURE);
    1073             :     }
    1074             : 
    1075             :   /* If "-j1" was specified and it turns out not to have had an argument,
    1076             :      treat it as "-j 1".  Likewise for -j2.  */
    1077         156 :   for (i = 0; i < 2; i++)
    1078         104 :     if (joption_count[i] != 0)
    1079             :       {
    1080           1 :         set_join_field (&join_field_1, i);
    1081           1 :         set_join_field (&join_field_2, i);
    1082             :       }
    1083             : 
    1084          52 :   if (join_field_1 == SIZE_MAX)
    1085          51 :     join_field_1 = 0;
    1086          52 :   if (join_field_2 == SIZE_MAX)
    1087          51 :     join_field_2 = 0;
    1088             : 
    1089          52 :   fp1 = STREQ (names[0], "-") ? stdin : fopen (names[0], "r");
    1090          52 :   if (!fp1)
    1091           4 :     error (EXIT_FAILURE, errno, "%s", names[0]);
    1092          48 :   fp2 = STREQ (names[1], "-") ? stdin : fopen (names[1], "r");
    1093          48 :   if (!fp2)
    1094           6 :     error (EXIT_FAILURE, errno, "%s", names[1]);
    1095          42 :   if (fp1 == fp2)
    1096           1 :     error (EXIT_FAILURE, errno, _("both files cannot be standard input"));
    1097          41 :   join (fp1, fp2);
    1098             : 
    1099          16 :   if (fclose (fp1) != 0)
    1100           0 :     error (EXIT_FAILURE, errno, "%s", names[0]);
    1101          16 :   if (fclose (fp2) != 0)
    1102           0 :     error (EXIT_FAILURE, errno, "%s", names[1]);
    1103             : 
    1104          16 :   if (issued_disorder_warning[0] || issued_disorder_warning[1])
    1105          10 :     exit (EXIT_FAILURE);
    1106             :   else
    1107           6 :     exit (EXIT_SUCCESS);
    1108             : }

Generated by: LCOV version 1.10