LCOV - code coverage report
Current view: top level - lib - unicodeio.c (source / functions) Hit Total Coverage
Test: coreutils.info Lines: 0 68 0.0 %
Date: 2018-01-30 Functions: 0 6 0.0 %

          Line data    Source code
       1             : /* Unicode character output to streams with locale dependent encoding.
       2             : 
       3             :    Copyright (C) 2000-2003, 2006 Free Software Foundation, Inc.
       4             : 
       5             :    This program is free software: you can redistribute it and/or modify
       6             :    it under the terms of the GNU General Public License as published by
       7             :    the Free Software Foundation; either version 3 of the License, or
       8             :    (at your option) any later version.
       9             : 
      10             :    This program is distributed in the hope that it will be useful,
      11             :    but WITHOUT ANY WARRANTY; without even the implied warranty of
      12             :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      13             :    GNU General Public License for more details.
      14             : 
      15             :    You should have received a copy of the GNU General Public License
      16             :    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
      17             : 
      18             : /* Written by Bruno Haible <haible@clisp.cons.org>.  */
      19             : 
      20             : /* Note: This file requires the locale_charset() function.  See in
      21             :    libiconv-1.8/libcharset/INTEGRATE for how to obtain it.  */
      22             : 
      23             : #include <config.h>
      24             : 
      25             : /* Specification.  */
      26             : #include "unicodeio.h"
      27             : 
      28             : #include <stdio.h>
      29             : #include <string.h>
      30             : #include <errno.h>
      31             : 
      32             : #if HAVE_ICONV
      33             : # include <iconv.h>
      34             : #endif
      35             : 
      36             : #include <error.h>
      37             : 
      38             : #include "gettext.h"
      39             : #define _(msgid) gettext (msgid)
      40             : #define N_(msgid) msgid
      41             : 
      42             : #include "localcharset.h"
      43             : 
      44             : /* When we pass a Unicode character to iconv(), we must pass it in a
      45             :    suitable encoding. The standardized Unicode encodings are
      46             :    UTF-8, UCS-2, UCS-4, UTF-16, UTF-16BE, UTF-16LE, UTF-7.
      47             :    UCS-2 supports only characters up to \U0000FFFF.
      48             :    UTF-16 and variants support only characters up to \U0010FFFF.
      49             :    UTF-7 is way too complex and not supported by glibc-2.1.
      50             :    UCS-4 specification leaves doubts about endianness and byte order
      51             :    mark. glibc currently interprets it as big endian without byte order
      52             :    mark, but this is not backed by an RFC.
      53             :    So we use UTF-8. It supports characters up to \U7FFFFFFF and is
      54             :    unambiguously defined.  */
      55             : 
      56             : /* Stores the UTF-8 representation of the Unicode character wc in r[0..5].
      57             :    Returns the number of bytes stored, or -1 if wc is out of range.  */
      58             : static int
      59           0 : utf8_wctomb (unsigned char *r, unsigned int wc)
      60             : {
      61             :   int count;
      62             : 
      63           0 :   if (wc < 0x80)
      64           0 :     count = 1;
      65           0 :   else if (wc < 0x800)
      66           0 :     count = 2;
      67           0 :   else if (wc < 0x10000)
      68           0 :     count = 3;
      69           0 :   else if (wc < 0x200000)
      70           0 :     count = 4;
      71           0 :   else if (wc < 0x4000000)
      72           0 :     count = 5;
      73           0 :   else if (wc <= 0x7fffffff)
      74           0 :     count = 6;
      75             :   else
      76           0 :     return -1;
      77             : 
      78           0 :   switch (count)
      79             :     {
      80             :       /* Note: code falls through cases! */
      81           0 :       case 6: r[5] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x4000000;
      82           0 :       case 5: r[4] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x200000;
      83           0 :       case 4: r[3] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x10000;
      84           0 :       case 3: r[2] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x800;
      85           0 :       case 2: r[1] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0xc0;
      86           0 :       case 1: r[0] = wc;
      87             :     }
      88             : 
      89           0 :   return count;
      90             : }
      91             : 
      92             : /* Luckily, the encoding's name is platform independent.  */
      93             : #define UTF8_NAME "UTF-8"
      94             : 
      95             : /* Converts the Unicode character CODE to its multibyte representation
      96             :    in the current locale and calls the SUCCESS callback on the resulting
      97             :    byte sequence.  If an error occurs, invokes the FAILURE callback instead,
      98             :    passing it CODE and an English error string.
      99             :    Returns whatever the callback returned.
     100             :    Assumes that the locale doesn't change between two calls.  */
     101             : long
     102           0 : unicode_to_mb (unsigned int code,
     103             :                long (*success) (const char *buf, size_t buflen,
     104             :                                 void *callback_arg),
     105             :                long (*failure) (unsigned int code, const char *msg,
     106             :                                 void *callback_arg),
     107             :                void *callback_arg)
     108             : {
     109             :   static int initialized;
     110             :   static int is_utf8;
     111             : #if HAVE_ICONV
     112             :   static iconv_t utf8_to_local;
     113             : #endif
     114             : 
     115             :   char inbuf[6];
     116             :   int count;
     117             : 
     118           0 :   if (!initialized)
     119             :     {
     120           0 :       const char *charset = locale_charset ();
     121             : 
     122           0 :       is_utf8 = !strcmp (charset, UTF8_NAME);
     123             : #if HAVE_ICONV
     124           0 :       if (!is_utf8)
     125             :         {
     126           0 :           utf8_to_local = iconv_open (charset, UTF8_NAME);
     127           0 :           if (utf8_to_local == (iconv_t)(-1))
     128             :             /* For an unknown encoding, assume ASCII.  */
     129           0 :             utf8_to_local = iconv_open ("ASCII", UTF8_NAME);
     130             :         }
     131             : #endif
     132           0 :       initialized = 1;
     133             :     }
     134             : 
     135             :   /* Test whether the utf8_to_local converter is available at all.  */
     136           0 :   if (!is_utf8)
     137             :     {
     138             : #if HAVE_ICONV
     139           0 :       if (utf8_to_local == (iconv_t)(-1))
     140           0 :         return failure (code, N_("iconv function not usable"), callback_arg);
     141             : #else
     142             :       return failure (code, N_("iconv function not available"), callback_arg);
     143             : #endif
     144             :     }
     145             : 
     146             :   /* Convert the character to UTF-8.  */
     147           0 :   count = utf8_wctomb ((unsigned char *) inbuf, code);
     148           0 :   if (count < 0)
     149           0 :     return failure (code, N_("character out of range"), callback_arg);
     150             : 
     151             : #if HAVE_ICONV
     152           0 :   if (!is_utf8)
     153             :     {
     154             :       char outbuf[25];
     155             :       const char *inptr;
     156             :       size_t inbytesleft;
     157             :       char *outptr;
     158             :       size_t outbytesleft;
     159             :       size_t res;
     160             : 
     161           0 :       inptr = inbuf;
     162           0 :       inbytesleft = count;
     163           0 :       outptr = outbuf;
     164           0 :       outbytesleft = sizeof (outbuf);
     165             : 
     166             :       /* Convert the character from UTF-8 to the locale's charset.  */
     167           0 :       res = iconv (utf8_to_local,
     168             :                    (ICONV_CONST char **)&inptr, &inbytesleft,
     169             :                    &outptr, &outbytesleft);
     170           0 :       if (inbytesleft > 0 || res == (size_t)(-1)
     171             :           /* Irix iconv() inserts a NUL byte if it cannot convert. */
     172             : # if !defined _LIBICONV_VERSION && (defined sgi || defined __sgi)
     173             :           || (res > 0 && code != 0 && outptr - outbuf == 1 && *outbuf == '\0')
     174             : # endif
     175             :          )
     176           0 :         return failure (code, NULL, callback_arg);
     177             : 
     178             :       /* Avoid glibc-2.1 bug and Solaris 7 bug.  */
     179             : # if defined _LIBICONV_VERSION \
     180             :     || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
     181             : 
     182             :       /* Get back to the initial shift state.  */
     183           0 :       res = iconv (utf8_to_local, NULL, NULL, &outptr, &outbytesleft);
     184           0 :       if (res == (size_t)(-1))
     185           0 :         return failure (code, NULL, callback_arg);
     186             : # endif
     187             : 
     188           0 :       return success (outbuf, outptr - outbuf, callback_arg);
     189             :     }
     190             : #endif
     191             : 
     192             :   /* At this point, is_utf8 is true, so no conversion is needed.  */
     193           0 :   return success (inbuf, count, callback_arg);
     194             : }
     195             : 
     196             : /* Simple success callback that outputs the converted string.
     197             :    The STREAM is passed as callback_arg.  */
     198             : long
     199           0 : fwrite_success_callback (const char *buf, size_t buflen, void *callback_arg)
     200             : {
     201           0 :   FILE *stream = (FILE *) callback_arg;
     202             : 
     203           0 :   fwrite (buf, 1, buflen, stream);
     204           0 :   return 0;
     205             : }
     206             : 
     207             : /* Simple failure callback that displays an error and exits.  */
     208             : static long
     209           0 : exit_failure_callback (unsigned int code, const char *msg, void *callback_arg)
     210             : {
     211           0 :   if (msg == NULL)
     212           0 :     error (1, 0, _("cannot convert U+%04X to local character set"), code);
     213             :   else
     214           0 :     error (1, 0, _("cannot convert U+%04X to local character set: %s"), code,
     215             :            gettext (msg));
     216           0 :   return -1;
     217             : }
     218             : 
     219             : /* Simple failure callback that displays a fallback representation in plain
     220             :    ASCII, using the same notation as ISO C99 strings.  */
     221             : static long
     222           0 : fallback_failure_callback (unsigned int code, const char *msg, void *callback_arg)
     223             : {
     224           0 :   FILE *stream = (FILE *) callback_arg;
     225             : 
     226           0 :   if (code < 0x10000)
     227           0 :     fprintf (stream, "\\u%04X", code);
     228             :   else
     229           0 :     fprintf (stream, "\\U%08X", code);
     230           0 :   return -1;
     231             : }
     232             : 
     233             : /* Outputs the Unicode character CODE to the output stream STREAM.
     234             :    Upon failure, exit if exit_on_error is true, otherwise output a fallback
     235             :    notation.  */
     236             : void
     237           0 : print_unicode_char (FILE *stream, unsigned int code, int exit_on_error)
     238             : {
     239           0 :   unicode_to_mb (code, fwrite_success_callback,
     240             :                  exit_on_error
     241             :                  ? exit_failure_callback
     242             :                  : fallback_failure_callback,
     243             :                  stream);
     244           0 : }

Generated by: LCOV version 1.10