index  prev  next

The Horror of multi-byte

--------------------------------------------------------------------------------------

The end result was something that was far too complex to live. Here is our strcasecmp() function from Samba with Japanese support:

int StrCaseCmp(const char *s, const char *t)
{
  /* compare until we run out of string, either t or s, or find a difference */
  /* We *must* use toupper rather than tolower here due to the
     asynchronous upper to lower mapping.
   */
  /*
   * For completeness we should put in equivalent code for code pages
   * 949 (Korean hangul) and 950 (Big5 Traditional Chinese) here - but
   * doubt anyone wants Samba to behave differently from Win95 and WinNT
   * here. They both treat full width ascii characters as case senstive
   * filenames (ie. they don't do the work we do here).
   * JRA.
   */

  if(lp_client_code_page() == KANJI_CODEPAGE)
  {
    /* Win95 treats full width ascii characters as case sensitive. */
    int diff;
    for (;;)
    {
      if (!*s || !*t)
	    return toupper (*s) - toupper (*t);
      else if (is_shift_jis (*s) && is_shift_jis (*t))
      {
	diff = ((int)(unsigned char) *s) - ((int)(unsigned char) *t);
	if (diff) {
	  int w1, w2;

	  w1 = ((int)(unsigned char) *s << 8) | (unsigned char) *(s+1);
	  w2 = ((int)(unsigned char) *t << 8) | (unsigned char) *(t+1);
	  w1 = regularize_sjis(w1);
	  w2 = regularize_sjis(w2);
	  if (w1 != w2) {
#if !defined(KANJI_WIN95_COMPATIBILITY)
	    int c1, c2;

	    c1 = (w1 >> 8) & 0xff;
	    c2 = (w2 >> 8) & 0xff;
	    if (!is_sj_roman (c1) || !is_sj_roman (c2))
	      return diff;
	    diff = sj_ro_toupper2 (w1 & 0xff) - sj_ro_toupper2 (w2 & 0xff);
	    if (diff)
#endif /* KANJI_WIN95_COMPATIBILITY */
	      return diff;
	  }
	}
#if !defined(KANJI_WIN95_COMPATIBILITY)
	else if (is_sj_alph (*s))
	{
	  diff = sj_toupper2 (*(s+1)) - sj_toupper2 (*(t+1));
	}
	else if (is_sj_greek (*s))
	{
	  diff = sj_gr_toupper2 (*(s+1)) - sj_gr_toupper2 (*(t+1));
	}
	else if (is_sj_roman (*s))
	{
	  diff = sj_ro_toupper2 (*(s+1)) - sj_ro_toupper2 (*(t+1));
	}
#endif /* KANJI_WIN95_COMPATIBILITY */
	else if (is_sj_russian(*s))
	{
	  diff = sj_ru_toupper2 (*(s+1)) - sj_ru_toupper2 (*(t+1));
	}
	else {
	  diff = ((int)(unsigned char) *(s+1)) - ((int)(unsigned char) *(t+1));
	}
	if (diff)
	  return diff;
	s += 2;
	t += 2;
      }
      else if (is_shift_jis (*s))
        return 1;
      else if (is_shift_jis (*t))
        return -1;
      else 
      {
        diff = toupper (*s) - toupper (*t);
        if (diff)
          return diff;
        s++;
        t++;
      }
    }
  }
  else
  {
    while (*s && *t && toupper(*s) == toupper(*t))
    {
      s++;
      t++;
    }

    return(toupper(*s) - toupper(*t));
  }
}

--------------------------------------------------------------------------------------

CIFS2001 Seattle
tridge@valinux.com