Line data Source code
1 : /* Determine the number of screen columns needed for a string.
2 : Copyright (C) 2000-2007 Free Software Foundation, Inc.
3 :
4 : This program is free software: you can redistribute it and/or modify
5 : it under the terms of the GNU General Public License as published by
6 : the Free Software Foundation; either version 3 of the License, or
7 : (at your option) any later version.
8 :
9 : This program is distributed in the hope that it will be useful,
10 : but WITHOUT ANY WARRANTY; without even the implied warranty of
11 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 : GNU General Public License for more details.
13 :
14 : You should have received a copy of the GNU General Public License
15 : along with this program. If not, see <http://www.gnu.org/licenses/>. */
16 :
17 : /* Written by Bruno Haible <haible@clisp.cons.org>. */
18 :
19 : #include <config.h>
20 :
21 : /* Specification. */
22 : #include "mbswidth.h"
23 :
24 : /* Get MB_CUR_MAX. */
25 : #include <stdlib.h>
26 :
27 : #include <string.h>
28 :
29 : /* Get isprint(). */
30 : #include <ctype.h>
31 :
32 : /* Get mbstate_t, mbrtowc(), mbsinit(), wcwidth(). */
33 : #include <wchar.h>
34 :
35 : /* Get iswcntrl(). */
36 : #include <wctype.h>
37 :
38 : #ifndef mbsinit
39 : # if !HAVE_MBSINIT
40 : # define mbsinit(ps) 1
41 : # endif
42 : #endif
43 :
44 : /* Returns the number of columns needed to represent the multibyte
45 : character string pointed to by STRING. If a non-printable character
46 : occurs, and MBSW_REJECT_UNPRINTABLE is specified, -1 is returned.
47 : With flags = MBSW_REJECT_INVALID | MBSW_REJECT_UNPRINTABLE, this is
48 : the multibyte analogue of the wcswidth function.
49 : If STRING is not of length < INT_MAX / 2, integer overflow can occur. */
50 : int
51 195 : mbswidth (const char *string, int flags)
52 : {
53 195 : return mbsnwidth (string, strlen (string), flags);
54 : }
55 :
56 : /* Returns the number of columns needed to represent the multibyte
57 : character string pointed to by STRING of length NBYTES. If a
58 : non-printable character occurs, and MBSW_REJECT_UNPRINTABLE is
59 : specified, -1 is returned.
60 : If NBYTES is not < INT_MAX / 2, integer overflow can occur. */
61 : int
62 195 : mbsnwidth (const char *string, size_t nbytes, int flags)
63 : {
64 195 : const char *p = string;
65 195 : const char *plimit = p + nbytes;
66 : int width;
67 :
68 195 : width = 0;
69 : #if HAVE_MBRTOWC
70 195 : if (MB_CUR_MAX > 1)
71 : {
72 0 : while (p < plimit)
73 0 : switch (*p)
74 : {
75 0 : case ' ': case '!': case '"': case '#': case '%':
76 : case '&': case '\'': case '(': case ')': case '*':
77 : case '+': case ',': case '-': case '.': case '/':
78 : case '0': case '1': case '2': case '3': case '4':
79 : case '5': case '6': case '7': case '8': case '9':
80 : case ':': case ';': case '<': case '=': case '>':
81 : case '?':
82 : case 'A': case 'B': case 'C': case 'D': case 'E':
83 : case 'F': case 'G': case 'H': case 'I': case 'J':
84 : case 'K': case 'L': case 'M': case 'N': case 'O':
85 : case 'P': case 'Q': case 'R': case 'S': case 'T':
86 : case 'U': case 'V': case 'W': case 'X': case 'Y':
87 : case 'Z':
88 : case '[': case '\\': case ']': case '^': case '_':
89 : case 'a': case 'b': case 'c': case 'd': case 'e':
90 : case 'f': case 'g': case 'h': case 'i': case 'j':
91 : case 'k': case 'l': case 'm': case 'n': case 'o':
92 : case 'p': case 'q': case 'r': case 's': case 't':
93 : case 'u': case 'v': case 'w': case 'x': case 'y':
94 : case 'z': case '{': case '|': case '}': case '~':
95 : /* These characters are printable ASCII characters. */
96 0 : p++;
97 0 : width++;
98 0 : break;
99 0 : default:
100 : /* If we have a multibyte sequence, scan it up to its end. */
101 : {
102 : mbstate_t mbstate;
103 0 : memset (&mbstate, 0, sizeof mbstate);
104 : do
105 : {
106 : wchar_t wc;
107 : size_t bytes;
108 : int w;
109 :
110 0 : bytes = mbrtowc (&wc, p, plimit - p, &mbstate);
111 :
112 0 : if (bytes == (size_t) -1)
113 : /* An invalid multibyte sequence was encountered. */
114 : {
115 0 : if (!(flags & MBSW_REJECT_INVALID))
116 : {
117 0 : p++;
118 0 : width++;
119 0 : break;
120 : }
121 : else
122 0 : return -1;
123 : }
124 :
125 0 : if (bytes == (size_t) -2)
126 : /* An incomplete multibyte character at the end. */
127 : {
128 0 : if (!(flags & MBSW_REJECT_INVALID))
129 : {
130 0 : p = plimit;
131 0 : width++;
132 0 : break;
133 : }
134 : else
135 0 : return -1;
136 : }
137 :
138 0 : if (bytes == 0)
139 : /* A null wide character was encountered. */
140 0 : bytes = 1;
141 :
142 0 : w = wcwidth (wc);
143 0 : if (w >= 0)
144 : /* A printable multibyte character. */
145 0 : width += w;
146 : else
147 : /* An unprintable multibyte character. */
148 0 : if (!(flags & MBSW_REJECT_UNPRINTABLE))
149 0 : width += (iswcntrl (wc) ? 0 : 1);
150 : else
151 0 : return -1;
152 :
153 0 : p += bytes;
154 : }
155 0 : while (! mbsinit (&mbstate));
156 : }
157 0 : break;
158 : }
159 0 : return width;
160 : }
161 : #endif
162 :
163 1802 : while (p < plimit)
164 : {
165 1412 : unsigned char c = (unsigned char) *p++;
166 :
167 1412 : if (isprint (c))
168 1412 : width++;
169 0 : else if (!(flags & MBSW_REJECT_UNPRINTABLE))
170 0 : width += (iscntrl (c) ? 0 : 1);
171 : else
172 0 : return -1;
173 : }
174 195 : return width;
175 : }
|