Line data Source code
1 : /* readtokens.c -- Functions for reading tokens from an input stream.
2 :
3 : Copyright (C) 1990-1991, 1999-2004, 2006 Free Software Foundation, Inc.
4 :
5 : This program is free software: you can redistribute it and/or modify
6 : it under the terms of the GNU General Public License as published by
7 : the Free Software Foundation; either version 3 of the License, or
8 : (at your option) any later version.
9 :
10 : This program is distributed in the hope that it will be useful,
11 : but WITHOUT ANY WARRANTY; without even the implied warranty of
12 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 : GNU General Public License for more details.
14 :
15 : You should have received a copy of the GNU General Public License
16 : along with this program. If not, see <http://www.gnu.org/licenses/>.
17 :
18 : Written by Jim Meyering. */
19 :
20 : /* This almost supercedes xreadline stuff -- using delim="\n"
21 : gives the same functionality, except that these functions
22 : would never return empty lines. */
23 :
24 : #include <config.h>
25 :
26 : #include "readtokens.h"
27 :
28 : #include <stdio.h>
29 : #include <stdlib.h>
30 : #include <string.h>
31 : #include <stdbool.h>
32 :
33 : #include "xalloc.h"
34 :
35 : #if USE_UNLOCKED_IO
36 : # include "unlocked-io.h"
37 : #endif
38 :
39 : #define STREQ(a,b) ((a) == (b) || ((a) && (b) && *(a) == *(b) \
40 : && strcmp(a, b) == 0))
41 :
42 : /* Initialize a tokenbuffer. */
43 :
44 : void
45 32 : init_tokenbuffer (token_buffer *tokenbuffer)
46 : {
47 32 : tokenbuffer->size = 0;
48 32 : tokenbuffer->buffer = NULL;
49 32 : }
50 :
51 : /* Read a token from STREAM into TOKENBUFFER.
52 : A token is delimited by any of the N_DELIM bytes in DELIM.
53 : Upon return, the token is in tokenbuffer->buffer and
54 : has a trailing '\0' instead of any original delimiter.
55 : The function value is the length of the token not including
56 : the final '\0'. Upon EOF (i.e. on the call after the last
57 : token is read) or error, return -1 without modifying tokenbuffer.
58 : The EOF and error conditions may be distinguished in the caller
59 : by testing ferror (STREAM).
60 :
61 : This function works properly on lines containing NUL bytes
62 : and on files do not end with a delimiter. */
63 :
64 : size_t
65 89 : readtoken (FILE *stream,
66 : const char *delim,
67 : size_t n_delim,
68 : token_buffer *tokenbuffer)
69 : {
70 : char *p;
71 : int c;
72 : size_t i, n;
73 : static const char *saved_delim = NULL;
74 : static char isdelim[256];
75 : bool same_delimiters;
76 :
77 89 : if (delim == NULL && saved_delim == NULL)
78 0 : abort ();
79 :
80 89 : same_delimiters = false;
81 89 : if (delim != saved_delim && saved_delim != NULL)
82 : {
83 0 : same_delimiters = true;
84 0 : for (i = 0; i < n_delim; i++)
85 : {
86 0 : if (delim[i] != saved_delim[i])
87 : {
88 0 : same_delimiters = false;
89 0 : break;
90 : }
91 : }
92 : }
93 :
94 89 : if (!same_delimiters)
95 : {
96 : size_t j;
97 89 : saved_delim = delim;
98 89 : memset (isdelim, 0, sizeof isdelim);
99 356 : for (j = 0; j < n_delim; j++)
100 : {
101 267 : unsigned char ch = delim[j];
102 267 : isdelim[ch] = 1;
103 : }
104 : }
105 :
106 : /* FIXME: don't fool with this caching. Use strchr instead. */
107 : /* skip over any leading delimiters */
108 89 : for (c = getc (stream); c >= 0 && isdelim[c]; c = getc (stream))
109 : {
110 : /* empty */
111 : }
112 :
113 89 : p = tokenbuffer->buffer;
114 89 : n = tokenbuffer->size;
115 89 : i = 0;
116 : for (;;)
117 : {
118 493 : if (c < 0 && i == 0)
119 32 : return -1;
120 :
121 259 : if (i == n)
122 31 : p = x2nrealloc (p, &n, sizeof *p);
123 :
124 259 : if (c < 0)
125 : {
126 30 : p[i] = 0;
127 30 : break;
128 : }
129 229 : if (isdelim[c])
130 : {
131 27 : p[i] = 0;
132 27 : break;
133 : }
134 202 : p[i++] = c;
135 202 : c = getc (stream);
136 : }
137 :
138 57 : tokenbuffer->buffer = p;
139 57 : tokenbuffer->size = n;
140 57 : return i;
141 : }
142 :
143 : /* Build a NULL-terminated array of pointers to tokens
144 : read from STREAM. Return the number of tokens read.
145 : All storage is obtained through calls to xmalloc-like functions.
146 :
147 : %%% Question: is it worth it to do a single
148 : %%% realloc() of `tokens' just before returning? */
149 :
150 : size_t
151 0 : readtokens (FILE *stream,
152 : size_t projected_n_tokens,
153 : const char *delim,
154 : size_t n_delim,
155 : char ***tokens_out,
156 : size_t **token_lengths)
157 : {
158 0 : token_buffer tb, *token = &tb;
159 : char **tokens;
160 : size_t *lengths;
161 : size_t sz;
162 : size_t n_tokens;
163 :
164 0 : if (projected_n_tokens == 0)
165 0 : projected_n_tokens = 64;
166 : else
167 0 : projected_n_tokens++; /* add one for trailing NULL pointer */
168 :
169 0 : sz = projected_n_tokens;
170 0 : tokens = xnmalloc (sz, sizeof *tokens);
171 0 : lengths = xnmalloc (sz, sizeof *lengths);
172 :
173 0 : n_tokens = 0;
174 0 : init_tokenbuffer (token);
175 : for (;;)
176 0 : {
177 : char *tmp;
178 0 : size_t token_length = readtoken (stream, delim, n_delim, token);
179 0 : if (n_tokens >= sz)
180 : {
181 0 : tokens = x2nrealloc (tokens, &sz, sizeof *tokens);
182 0 : lengths = xnrealloc (lengths, sz, sizeof *lengths);
183 : }
184 :
185 0 : if (token_length == (size_t) -1)
186 : {
187 : /* don't increment n_tokens for NULL entry */
188 0 : tokens[n_tokens] = NULL;
189 0 : lengths[n_tokens] = 0;
190 0 : break;
191 : }
192 0 : tmp = xnmalloc (token_length + 1, sizeof *tmp);
193 0 : lengths[n_tokens] = token_length;
194 0 : tokens[n_tokens] = memcpy (tmp, token->buffer, token_length + 1);
195 0 : n_tokens++;
196 : }
197 :
198 0 : free (token->buffer);
199 0 : *tokens_out = tokens;
200 0 : if (token_lengths != NULL)
201 0 : *token_lengths = lengths;
202 0 : return n_tokens;
203 : }
|