merge

6d3f944d · bar@mysql.com · 97bd09e1 · 5085f368 · 6d3f944d · 6d3f944d
Commit 6d3f944d authored Mar 29, 2005 by bar@mysql.com
5 changed files
--- a/mysys/charset-def.c
+++ b/mysys/charset-def.c
@@ -62,6 +62,9 @@ extern CHARSET_INFO my_charset_utf8_slovak_uca_ci;
 extern CHARSET_INFO my_charset_utf8_spanish2_uca_ci;
 extern CHARSET_INFO my_charset_utf8_roman_uca_ci;
 extern CHARSET_INFO my_charset_utf8_persian_uca_ci;
+#ifdef HAVE_CYBOZU_COLLATION
+extern CHARSET_INFO my_charset_utf8_general_cs;
+#endif
 #endif

 #endif /* HAVE_UCA_COLLATIONS */
@@ -156,6 +159,9 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused)))
 #ifdef HAVE_CHARSET_utf8
  add_compiled_collation(&my_charset_utf8_general_ci);
  add_compiled_collation(&my_charset_utf8_bin);
+#ifdef HAVE_CYBOZU_COLLATION
+  add_compiled_collation(&my_charset_utf8_general_cs);
+#endif
 #ifdef HAVE_UCA_COLLATIONS
  add_compiled_collation(&my_charset_utf8_general_uca_ci);
  add_compiled_collation(&my_charset_utf8_icelandic_uca_ci);

--- a/sql/mysqld.cc
+++ b/sql/mysqld.cc
@@ -4075,8 +4075,6 @@ errorconn:
 	      NullS);
      sql_perror(buff);
    }
-    my_security_attr_free(sa_event);
-    my_security_attr_free(sa_mapping);
    if (handle_client_file_map) 
      CloseHandle(handle_client_file_map);
    if (handle_client_map)

--- a/strings/ctype-big5.c
+++ b/strings/ctype-big5.c
@@ -6300,11 +6300,7 @@ uint my_well_formed_len_big5(CHARSET_INFO *cs __attribute__((unused)),
  const char *emb= e - 1; /* Last possible end of an MB character */
  while (pos && b < e)
  {
-    /*
-      Cast to int8 for extra safety. "char" can be unsigned
-      by default on some platforms.
-    */
-    if (((int8)b[0]) >= 0)
+    if ((uchar) b[0] < 128)
    {
      /* Single byte ascii character */
      b++;

--- a/strings/ctype-sjis.c
+++ b/strings/ctype-sjis.c
@@ -4591,12 +4591,7 @@ uint my_well_formed_len_sjis(CHARSET_INFO *cs __attribute__((unused)),
  const char *b0= b;
  while (pos && b < e)
  {
-    /*
-      Cast to int8 for extra safety.
-      "char" can be unsigned by default
-      on some platforms.
-    */
-    if (((int8)b[0]) >= 0)
+    if ((uchar) b[0] < 128)
    {
      /* Single byte ascii character */
      b++;

--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@@ -2148,12 +2148,7 @@ int my_strcasecmp_utf8(CHARSET_INFO *cs, const char *s, const char *t)
  {
    my_wc_t s_wc,t_wc;
    
-    /*
-      Cast to int8 for extra safety.
-      char can be unsigned by default
-      on some platforms.
-    */
-    if (((int8)s[0]) >= 0)
+    if ((uchar) s[0] < 128)
    {
      /* 
        s[0] is between 0 and 127.
@@ -2200,7 +2195,7 @@ int my_strcasecmp_utf8(CHARSET_INFO *cs, const char *s, const char *t)
    
    /* Do the same for the second string */
    
-    if (((int8)t[0]) >= 0)
+    if ((uchar) t[0] < 128)
    {
      /* Convert single byte character into weight */
      t_wc= plane00[(uchar) t[0]].tolower;
@@ -2410,6 +2405,172 @@ CHARSET_INFO my_charset_utf8_bin=
    &my_collation_mb_bin_handler
 };

+#ifdef HAVE_CYBOZU_COLLATION
+
+/*
+ * These functions bacically do the same as their original, except
+ * that they return 0 only when two comparing unicode strings are
+ * strictly the same in case-sensitive way.  See "save_diff" local
+ * variable to what they actually do.
+ */
+
+static int my_strnncoll_utf8_cs(CHARSET_INFO *cs, 
+                                const uchar *s, uint slen,
+                                const uchar *t, uint tlen,
+                                my_bool t_is_prefix)
+{
+  int s_res,t_res;
+  my_wc_t s_wc,t_wc;
+  const uchar *se=s+slen;
+  const uchar *te=t+tlen;
+  int save_diff = 0;
+  int diff;
+
+  while ( s < se && t < te )
+  {
+    int plane;
+    s_res=my_utf8_uni(cs,&s_wc, s, se);
+    t_res=my_utf8_uni(cs,&t_wc, t, te);
+    
+    if ( s_res <= 0 || t_res <= 0 )
+
+    {
+      /* Incorrect string, compare by char value */
+      return ((int)s[0]-(int)t[0]); 
+    }
+    
+    if ( save_diff == 0 )
+    {
+      save_diff = ((int)s_wc) - ((int)t_wc);
+    }
+    plane=(s_wc>>8) & 0xFF;
+    s_wc = uni_plane[plane] ? uni_plane[plane][s_wc & 0xFF].sort : s_wc;
+    plane=(t_wc>>8) & 0xFF;
+    t_wc = uni_plane[plane] ? uni_plane[plane][t_wc & 0xFF].sort : t_wc;
+    if ( s_wc != t_wc )
+    {
+      return  ((int) s_wc) - ((int) t_wc);
+    }
+    
+    s+=s_res;
+    t+=t_res;
+  }
+  diff = ( (se-s) - (te-t) );
+  return t_is_prefix ? t-te : ((diff == 0) ? save_diff : diff);
+}
+
+static int my_strnncollsp_utf8_cs(CHARSET_INFO *cs, 
+                                  const uchar *s, uint slen,
+                                  const uchar *t, uint tlen)
+{
+  int s_res,t_res;
+  my_wc_t s_wc,t_wc;
+  const uchar *se= s+slen;
+  const uchar *te= t+tlen;
+  int save_diff = 0;
+  
+  while ( s < se && t < te )
+  {
+    int plane;
+    s_res=my_utf8_uni(cs,&s_wc, s, se);
+    t_res=my_utf8_uni(cs,&t_wc, t, te);
+    
+    if ( s_res <= 0 || t_res <= 0 )
+    {
+      /* Incorrect string, compare by char value */
+      return ((int)s[0]-(int)t[0]); 
+    }
+    
+    if ( save_diff == 0 )
+    {
+      save_diff = ((int)s_wc) - ((int)t_wc);
+    }
+    plane=(s_wc>>8) & 0xFF;
+    s_wc = uni_plane[plane] ? uni_plane[plane][s_wc & 0xFF].sort : s_wc;
+    plane=(t_wc>>8) & 0xFF;
+    t_wc = uni_plane[plane] ? uni_plane[plane][t_wc & 0xFF].sort : t_wc;
+    if ( s_wc != t_wc )
+    {
+      return  ((int) s_wc) - ((int) t_wc);
+    }
+    
+    s+=s_res;
+    t+=t_res;
+  }
+  
+  slen= se-s;
+  tlen= te-t;
+  
+  if (slen != tlen)
+  {
+    int swap= 0;
+    if (slen < tlen)
+    {
+      slen= tlen;
+      s= t;
+      se= te;
+      swap= -1;
+    }
+    /*
+      This following loop uses the fact that in UTF-8
+      all multibyte characters are greater than space,
+      and all multibyte head characters are greater than
+      space. It means if we meet a character greater
+      than space, it always means that the longer string
+      is greater. So we can reuse the same loop from the
+      8bit version, without having to process full multibute
+      sequences.
+    */
+    for ( ; s < se; s++)
+    {
+      if (*s != ' ')
+        return ((int)*s -  (int) ' ') ^ swap;
+    }
+  }
+  return save_diff;
+}
+
+static MY_COLLATION_HANDLER my_collation_cs_handler =
+{
+    NULL,		/* init */
+    my_strnncoll_utf8_cs,
+    my_strnncollsp_utf8_cs,
+    my_strnxfrm_utf8,
+    my_like_range_simple,
+    my_wildcmp_mb,
+    my_strcasecmp_utf8,
+    my_instr_mb,
+    my_hash_sort_utf8
+};
+
+CHARSET_INFO my_charset_utf8_general_cs=
+{
+    254,0,0,		/* number       */
+    MY_CS_COMPILED|MY_CS_UNICODE,	/* state  */
+    "utf8",		/* cs name      */
+    "utf8_general_cs",	/* name         */
+    "",			/* comment      */
+    NULL,		/* tailoring    */
+    ctype_utf8,		/* ctype        */
+    to_lower_utf8,	/* to_lower     */
+    to_upper_utf8,	/* to_upper     */
+    to_upper_utf8,	/* sort_order   */
+    NULL,		/* contractions */
+    NULL,		/* sort_order_big*/
+    NULL,		/* tab_to_uni   */
+    NULL,		/* tab_from_uni */
+    NULL,		/* state_map    */
+    NULL,		/* ident_map    */
+    1,			/* strxfrm_multiply */
+    1,			/* mbminlen     */
+    3,			/* mbmaxlen     */
+    0,			/* min_sort_char */
+    255,		/* max_sort_char */
+    &my_charset_utf8_handler,
+    &my_collation_cs_handler
+};
+#endif	/* Cybozu Hack */
+

 #ifdef MY_TEST_UTF8
 #include <stdio.h>