LCOV - code coverage report
Current view: top level - lib - localcharset.c (source / functions) Hit Total Coverage
Test: coreutils.info Lines: 29 73 39.7 %
Date: 2018-01-30 Functions: 2 2 100.0 %

          Line data    Source code
       1             : /* -*- buffer-read-only: t -*- vi: set ro: */
       2             : /* DO NOT EDIT! GENERATED AUTOMATICALLY! */
       3             : #line 1
       4             : /* Determine a canonical name for the current locale's character encoding.
       5             : 
       6             :    Copyright (C) 2000-2006, 2008 Free Software Foundation, Inc.
       7             : 
       8             :    This program is free software; you can redistribute it and/or modify
       9             :    it under the terms of the GNU General Public License as published by
      10             :    the Free Software Foundation; either version 3, or (at your option)
      11             :    any later version.
      12             : 
      13             :    This program is distributed in the hope that it will be useful,
      14             :    but WITHOUT ANY WARRANTY; without even the implied warranty of
      15             :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      16             :    GNU General Public License for more details.
      17             : 
      18             :    You should have received a copy of the GNU General Public License along
      19             :    with this program; if not, write to the Free Software Foundation,
      20             :    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
      21             : 
      22             : /* Written by Bruno Haible <bruno@clisp.org>.  */
      23             : 
      24             : #include <config.h>
      25             : 
      26             : /* Specification.  */
      27             : #include "localcharset.h"
      28             : 
      29             : #include <stddef.h>
      30             : #include <stdio.h>
      31             : #include <string.h>
      32             : #include <stdlib.h>
      33             : 
      34             : #if defined _WIN32 || defined __WIN32__
      35             : # define WIN32_NATIVE
      36             : #endif
      37             : 
      38             : #if defined __EMX__
      39             : /* Assume EMX program runs on OS/2, even if compiled under DOS.  */
      40             : # ifndef OS2
      41             : #  define OS2
      42             : # endif
      43             : #endif
      44             : 
      45             : #if !defined WIN32_NATIVE
      46             : # if HAVE_LANGINFO_CODESET
      47             : #  include <langinfo.h>
      48             : # else
      49             : #  if 0 /* see comment below */
      50             : #   include <locale.h>
      51             : #  endif
      52             : # endif
      53             : # ifdef __CYGWIN__
      54             : #  define WIN32_LEAN_AND_MEAN
      55             : #  include <windows.h>
      56             : # endif
      57             : #elif defined WIN32_NATIVE
      58             : # define WIN32_LEAN_AND_MEAN
      59             : # include <windows.h>
      60             : #endif
      61             : #if defined OS2
      62             : # define INCL_DOS
      63             : # include <os2.h>
      64             : #endif
      65             : 
      66             : #if ENABLE_RELOCATABLE
      67             : # include "relocatable.h"
      68             : #else
      69             : # define relocate(pathname) (pathname)
      70             : #endif
      71             : 
      72             : /* Get LIBDIR.  */
      73             : #ifndef LIBDIR
      74             : # include "configmake.h"
      75             : #endif
      76             : 
      77             : #if defined _WIN32 || defined __WIN32__ || defined __CYGWIN__ || defined __EMX__ || defined __DJGPP__
      78             :   /* Win32, Cygwin, OS/2, DOS */
      79             : # define ISSLASH(C) ((C) == '/' || (C) == '\\')
      80             : #endif
      81             : 
      82             : #ifndef DIRECTORY_SEPARATOR
      83             : # define DIRECTORY_SEPARATOR '/'
      84             : #endif
      85             : 
      86             : #ifndef ISSLASH
      87             : # define ISSLASH(C) ((C) == DIRECTORY_SEPARATOR)
      88             : #endif
      89             : 
      90             : #if HAVE_DECL_GETC_UNLOCKED
      91             : # undef getc
      92             : # define getc getc_unlocked
      93             : #endif
      94             : 
      95             : /* The following static variable is declared 'volatile' to avoid a
      96             :    possible multithread problem in the function get_charset_aliases. If we
      97             :    are running in a threaded environment, and if two threads initialize
      98             :    'charset_aliases' simultaneously, both will produce the same value,
      99             :    and everything will be ok if the two assignments to 'charset_aliases'
     100             :    are atomic. But I don't know what will happen if the two assignments mix.  */
     101             : #if __STDC__ != 1
     102             : # define volatile /* empty */
     103             : #endif
     104             : /* Pointer to the contents of the charset.alias file, if it has already been
     105             :    read, else NULL.  Its format is:
     106             :    ALIAS_1 '\0' CANONICAL_1 '\0' ... ALIAS_n '\0' CANONICAL_n '\0' '\0'  */
     107             : static const char * volatile charset_aliases;
     108         321 : 
     109             : /* Return a pointer to the contents of the charset.alias file.  */
     110             : static const char *
     111             : get_charset_aliases (void)
     112         321 : {
     113         321 :   const char *cp;
     114             : 
     115             :   cp = charset_aliases;
     116             :   if (cp == NULL)
     117             :     {
     118         317 : #if !(defined VMS || defined WIN32_NATIVE || defined __CYGWIN__)
     119             :       FILE *fp;
     120             :       const char *dir;
     121             :       const char *base = "charset.alias";
     122             :       char *file_name;
     123         317 : 
     124         317 :       /* Make it possible to override the charset.alias location.  This is
     125         317 :          necessary for running the testsuite before "make install".  */
     126             :       dir = getenv ("CHARSETALIASDIR");
     127             :       if (dir == NULL || dir[0] == '\0')
     128             :         dir = relocate (LIBDIR);
     129         317 : 
     130         317 :       /* Concatenate dir and base into freshly allocated file_name.  */
     131         317 :       {
     132         317 :         size_t dir_len = strlen (dir);
     133         317 :         size_t base_len = strlen (base);
     134             :         int add_slash = (dir_len > 0 && !ISSLASH (dir[dir_len - 1]));
     135         317 :         file_name = (char *) malloc (dir_len + add_slash + base_len + 1);
     136         317 :         if (file_name != NULL)
     137         317 :           {
     138         317 :             memcpy (file_name, dir, dir_len);
     139             :             if (add_slash)
     140             :               file_name[dir_len] = DIRECTORY_SEPARATOR;
     141             :             memcpy (file_name + dir_len + add_slash, base, base_len + 1);
     142         317 :           }
     143             :       }
     144         317 : 
     145             :       if (file_name == NULL || (fp = fopen (file_name, "r")) == NULL)
     146             :         /* Out of memory or file not found, treat it as empty.  */
     147             :         cp = "";
     148           0 :       else
     149           0 :         {
     150             :           /* Parse the file's contents.  */
     151             :           char *res_ptr = NULL;
     152           0 :           size_t res_size = 0;
     153             : 
     154             :           for (;;)
     155             :             {
     156             :               int c;
     157             :               char buf1[50+1];
     158             :               char buf2[50+1];
     159           0 :               size_t l1, l2;
     160           0 :               char *old_res_ptr;
     161           0 : 
     162           0 :               c = getc (fp);
     163           0 :               if (c == EOF)
     164           0 :                 break;
     165             :               if (c == '\n' || c == ' ' || c == '\t')
     166             :                 continue;
     167             :               if (c == '#')
     168           0 :                 {
     169           0 :                   /* Skip comment, to end of line.  */
     170           0 :                   do
     171           0 :                     c = getc (fp);
     172           0 :                   while (!(c == EOF || c == '\n'));
     173             :                   if (c == EOF)
     174           0 :                     break;
     175           0 :                   continue;
     176           0 :                 }
     177           0 :               ungetc (c, fp);
     178           0 :               if (fscanf (fp, "%50s %50s", buf1, buf2) < 2)
     179           0 :                 break;
     180           0 :               l1 = strlen (buf1);
     181             :               l2 = strlen (buf2);
     182           0 :               old_res_ptr = res_ptr;
     183           0 :               if (res_size == 0)
     184             :                 {
     185             :                   res_size = l1 + 1 + l2 + 1;
     186             :                   res_ptr = (char *) malloc (res_size + 1);
     187           0 :                 }
     188           0 :               else
     189             :                 {
     190           0 :                   res_size += l1 + 1 + l2 + 1;
     191             :                   res_ptr = (char *) realloc (res_ptr, res_size + 1);
     192             :                 }
     193           0 :               if (res_ptr == NULL)
     194           0 :                 {
     195           0 :                   /* Out of memory. */
     196           0 :                   res_size = 0;
     197             :                   if (old_res_ptr != NULL)
     198           0 :                     free (old_res_ptr);
     199           0 :                   break;
     200             :                 }
     201           0 :               strcpy (res_ptr + res_size - (l2 + 1) - (l1 + 1), buf1);
     202           0 :               strcpy (res_ptr + res_size - (l2 + 1), buf2);
     203           0 :             }
     204             :           fclose (fp);
     205             :           if (res_size == 0)
     206           0 :             cp = "";
     207           0 :           else
     208             :             {
     209             :               *(res_ptr + res_size) = '\0';
     210             :               cp = res_ptr;
     211         317 :             }
     212         317 :         }
     213             : 
     214             :       if (file_name != NULL)
     215             :         free (file_name);
     216             : 
     217             : #else
     218             : 
     219             : # if defined VMS
     220             :       /* To avoid the troubles of an extra file charset.alias_vms in the
     221             :          sources of many GNU packages, simply inline the aliases here.  */
     222             :       /* The list of encodings is taken from the OpenVMS 7.3-1 documentation
     223             :          "Compaq C Run-Time Library Reference Manual for OpenVMS systems"
     224             :          section 10.7 "Handling Different Character Sets".  */
     225             :       cp = "ISO8859-1" "\0" "ISO-8859-1" "\0"
     226             :            "ISO8859-2" "\0" "ISO-8859-2" "\0"
     227             :            "ISO8859-5" "\0" "ISO-8859-5" "\0"
     228             :            "ISO8859-7" "\0" "ISO-8859-7" "\0"
     229             :            "ISO8859-8" "\0" "ISO-8859-8" "\0"
     230             :            "ISO8859-9" "\0" "ISO-8859-9" "\0"
     231             :            /* Japanese */
     232             :            "eucJP" "\0" "EUC-JP" "\0"
     233             :            "SJIS" "\0" "SHIFT_JIS" "\0"
     234             :            "DECKANJI" "\0" "DEC-KANJI" "\0"
     235             :            "SDECKANJI" "\0" "EUC-JP" "\0"
     236             :            /* Chinese */
     237             :            "eucTW" "\0" "EUC-TW" "\0"
     238             :            "DECHANYU" "\0" "DEC-HANYU" "\0"
     239             :            "DECHANZI" "\0" "GB2312" "\0"
     240             :            /* Korean */
     241             :            "DECKOREAN" "\0" "EUC-KR" "\0";
     242             : # endif
     243             : 
     244             : # if defined WIN32_NATIVE || defined __CYGWIN__
     245             :       /* To avoid the troubles of installing a separate file in the same
     246             :          directory as the DLL and of retrieving the DLL's directory at
     247             :          runtime, simply inline the aliases here.  */
     248             : 
     249             :       cp = "CP936" "\0" "GBK" "\0"
     250             :            "CP1361" "\0" "JOHAB" "\0"
     251             :            "CP20127" "\0" "ASCII" "\0"
     252             :            "CP20866" "\0" "KOI8-R" "\0"
     253             :            "CP20936" "\0" "GB2312" "\0"
     254             :            "CP21866" "\0" "KOI8-RU" "\0"
     255             :            "CP28591" "\0" "ISO-8859-1" "\0"
     256             :            "CP28592" "\0" "ISO-8859-2" "\0"
     257             :            "CP28593" "\0" "ISO-8859-3" "\0"
     258             :            "CP28594" "\0" "ISO-8859-4" "\0"
     259             :            "CP28595" "\0" "ISO-8859-5" "\0"
     260             :            "CP28596" "\0" "ISO-8859-6" "\0"
     261             :            "CP28597" "\0" "ISO-8859-7" "\0"
     262             :            "CP28598" "\0" "ISO-8859-8" "\0"
     263             :            "CP28599" "\0" "ISO-8859-9" "\0"
     264             :            "CP28605" "\0" "ISO-8859-15" "\0"
     265             :            "CP38598" "\0" "ISO-8859-8" "\0"
     266             :            "CP51932" "\0" "EUC-JP" "\0"
     267             :            "CP51936" "\0" "GB2312" "\0"
     268             :            "CP51949" "\0" "EUC-KR" "\0"
     269             :            "CP51950" "\0" "EUC-TW" "\0"
     270             :            "CP54936" "\0" "GB18030" "\0"
     271             :            "CP65001" "\0" "UTF-8" "\0";
     272         317 : # endif
     273             : #endif
     274             : 
     275         321 :       charset_aliases = cp;
     276             :     }
     277             : 
     278             :   return cp;
     279             : }
     280             : 
     281             : /* Determine the current locale's character encoding, and canonicalize it
     282             :    into one of the canonical names listed in config.charset.
     283             :    The result must not be freed; it is statically allocated.
     284             :    If the canonical name cannot be determined, the result is a non-canonical
     285             :    name.  */
     286             : 
     287             : #ifdef STATIC
     288         321 : STATIC
     289             : #endif
     290             : const char *
     291             : locale_charset (void)
     292             : {
     293             :   const char *codeset;
     294             :   const char *aliases;
     295             : 
     296             : #if !(defined WIN32_NATIVE || defined OS2)
     297             : 
     298         321 : # if HAVE_LANGINFO_CODESET
     299             : 
     300             :   /* Most systems support nl_langinfo (CODESET) nowadays.  */
     301             :   codeset = nl_langinfo (CODESET);
     302             : 
     303             : #  ifdef __CYGWIN__
     304             :   /* Cygwin 2006 does not have locales.  nl_langinfo (CODESET) always
     305             :      returns "US-ASCII".  As long as this is not fixed, return the suffix
     306             :      of the locale name from the environment variables (if present) or
     307             :      the codepage as a number.  */
     308             :   if (codeset != NULL && strcmp (codeset, "US-ASCII") == 0)
     309             :     {
     310             :       const char *locale;
     311             :       static char buf[2 + 10 + 1];
     312             : 
     313             :       locale = getenv ("LC_ALL");
     314             :       if (locale == NULL || locale[0] == '\0')
     315             :         {
     316             :           locale = getenv ("LC_CTYPE");
     317             :           if (locale == NULL || locale[0] == '\0')
     318             :             locale = getenv ("LANG");
     319             :         }
     320             :       if (locale != NULL && locale[0] != '\0')
     321             :         {
     322             :           /* If the locale name contains an encoding after the dot, return
     323             :              it.  */
     324             :           const char *dot = strchr (locale, '.');
     325             : 
     326             :           if (dot != NULL)
     327             :             {
     328             :               const char *modifier;
     329             : 
     330             :               dot++;
     331             :               /* Look for the possible @... trailer and remove it, if any.  */
     332             :               modifier = strchr (dot, '@');
     333             :               if (modifier == NULL)
     334             :                 return dot;
     335             :               if (modifier - dot < sizeof (buf))
     336             :                 {
     337             :                   memcpy (buf, dot, modifier - dot);
     338             :                   buf [modifier - dot] = '\0';
     339             :                   return buf;
     340             :                 }
     341             :             }
     342             :         }
     343             : 
     344             :       /* Woe32 has a function returning the locale's codepage as a number.  */
     345             :       sprintf (buf, "CP%u", GetACP ());
     346             :       codeset = buf;
     347             :     }
     348             : #  endif
     349             : 
     350             : # else
     351             : 
     352             :   /* On old systems which lack it, use setlocale or getenv.  */
     353             :   const char *locale = NULL;
     354             : 
     355             :   /* But most old systems don't have a complete set of locales.  Some
     356             :      (like SunOS 4 or DJGPP) have only the C locale.  Therefore we don't
     357             :      use setlocale here; it would return "C" when it doesn't support the
     358             :      locale name the user has set.  */
     359             : #  if 0
     360             :   locale = setlocale (LC_CTYPE, NULL);
     361             : #  endif
     362             :   if (locale == NULL || locale[0] == '\0')
     363             :     {
     364             :       locale = getenv ("LC_ALL");
     365             :       if (locale == NULL || locale[0] == '\0')
     366             :         {
     367             :           locale = getenv ("LC_CTYPE");
     368             :           if (locale == NULL || locale[0] == '\0')
     369             :             locale = getenv ("LANG");
     370             :         }
     371             :     }
     372             : 
     373             :   /* On some old systems, one used to set locale = "iso8859_1". On others,
     374             :      you set it to "language_COUNTRY.charset". In any case, we resolve it
     375             :      through the charset.alias file.  */
     376             :   codeset = locale;
     377             : 
     378             : # endif
     379             : 
     380             : #elif defined WIN32_NATIVE
     381             : 
     382             :   static char buf[2 + 10 + 1];
     383             : 
     384             :   /* Woe32 has a function returning the locale's codepage as a number.  */
     385             :   sprintf (buf, "CP%u", GetACP ());
     386             :   codeset = buf;
     387             : 
     388             : #elif defined OS2
     389             : 
     390             :   const char *locale;
     391             :   static char buf[2 + 10 + 1];
     392             :   ULONG cp[3];
     393             :   ULONG cplen;
     394             : 
     395             :   /* Allow user to override the codeset, as set in the operating system,
     396             :      with standard language environment variables.  */
     397             :   locale = getenv ("LC_ALL");
     398             :   if (locale == NULL || locale[0] == '\0')
     399             :     {
     400             :       locale = getenv ("LC_CTYPE");
     401             :       if (locale == NULL || locale[0] == '\0')
     402             :         locale = getenv ("LANG");
     403             :     }
     404             :   if (locale != NULL && locale[0] != '\0')
     405             :     {
     406             :       /* If the locale name contains an encoding after the dot, return it.  */
     407             :       const char *dot = strchr (locale, '.');
     408             : 
     409             :       if (dot != NULL)
     410             :         {
     411             :           const char *modifier;
     412             : 
     413             :           dot++;
     414             :           /* Look for the possible @... trailer and remove it, if any.  */
     415             :           modifier = strchr (dot, '@');
     416             :           if (modifier == NULL)
     417             :             return dot;
     418             :           if (modifier - dot < sizeof (buf))
     419             :             {
     420             :               memcpy (buf, dot, modifier - dot);
     421             :               buf [modifier - dot] = '\0';
     422             :               return buf;
     423             :             }
     424             :         }
     425             : 
     426             :       /* Resolve through the charset.alias file.  */
     427             :       codeset = locale;
     428             :     }
     429             :   else
     430             :     {
     431             :       /* OS/2 has a function returning the locale's codepage as a number.  */
     432             :       if (DosQueryCp (sizeof (cp), cp, &cplen))
     433             :         codeset = "";
     434             :       else
     435             :         {
     436             :           sprintf (buf, "CP%u", cp[0]);
     437             :           codeset = buf;
     438             :         }
     439             :     }
     440         321 : 
     441             : #endif
     442           0 : 
     443             :   if (codeset == NULL)
     444             :     /* The canonical name cannot be determined.  */
     445         642 :     codeset = "";
     446         321 : 
     447           0 :   /* Resolve alias. */
     448           0 :   for (aliases = get_charset_aliases ();
     449           0 :        *aliases != '\0';
     450             :        aliases += strlen (aliases) + 1, aliases += strlen (aliases) + 1)
     451           0 :     if (strcmp (codeset, aliases) == 0
     452           0 :         || (aliases[0] == '*' && aliases[1] == '\0'))
     453             :       {
     454             :         codeset = aliases + strlen (aliases) + 1;
     455             :         break;
     456             :       }
     457             : 
     458         321 :   /* Don't return an empty string.  GNU libc and GNU libiconv interpret
     459           0 :      the empty string as denoting "the locale's character encoding",
     460             :      thus GNU libiconv would call this function a second time.  */
     461         321 :   if (codeset[0] == '\0')
     462             :     codeset = "ASCII";
     463             : 
     464             :   return codeset;
     465             : }

Generated by: LCOV version 1.10