dlls/kernel32/locale.c |   2 +-
 include/wine/unicode.h |   2 +-
 libs/wine/sortkey.c    | 365 +++++++++++++++++++++++++++++++++++++++-----
 3 files changed, 325 insertions(+), 44 deletions(-)

diff --git a/dlls/kernel32/locale.c b/dlls/kernel32/locale.c
index 253032d..9b2b99f 100644
--- a/dlls/kernel32/locale.c
+++ b/dlls/kernel32/locale.c
@@ -3376,7 +3376,7 @@ INT WINAPI CompareStringEx(LPCWSTR locale, DWORD flags, LPCWSTR str1, INT len1,
     if (len1 < 0) len1 = strlenW(str1);
     if (len2 < 0) len2 = strlenW(str2);
 
-    ret = wine_compare_string(flags, str1, len1, str2, len2);
+    ret = wine_compare_string(NULL, flags, str1, len1, str2, len2);
 
     if (ret) /* need to translate result */
         return (ret < 0) ? CSTR_LESS_THAN : CSTR_GREATER_THAN;
diff --git a/include/wine/unicode.h b/include/wine/unicode.h
index 35c6166..34e660e 100644
--- a/include/wine/unicode.h
+++ b/include/wine/unicode.h
@@ -97,7 +97,7 @@ extern int wine_cpsymbol_wcstombs( const WCHAR *src, int srclen, char *dst, int
 extern int wine_utf8_mbstowcs( int flags, const char *src, int srclen, WCHAR *dst, int dstlen );
 extern int wine_utf8_wcstombs( int flags, const WCHAR *src, int srclen, char *dst, int dstlen );
 
-extern int wine_compare_string( int flags, const WCHAR *str1, int len1, const WCHAR *str2, int len2 );
+extern int wine_compare_string( LCID lcid, int flags, const WCHAR *str1, int len1, const WCHAR *str2, int len2 );
 extern int wine_get_sortkey( int flags, const WCHAR *src, int srclen, char *dst, int dstlen );
 extern int wine_fold_string( int flags, const WCHAR *src, int srclen , WCHAR *dst, int dstlen );
 
diff --git a/libs/wine/sortkey.c b/libs/wine/sortkey.c
index 7280501..a160c24 100644
--- a/libs/wine/sortkey.c
+++ b/libs/wine/sortkey.c
@@ -18,6 +18,9 @@
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
  */
 #include "wine/unicode.h"
+#include <ctype.h>
+#include "wine/debug.h"
+WINE_DEFAULT_DEBUG_CHANNEL(string);
 
 extern int get_decomposition(WCHAR src, WCHAR *dst, unsigned int dstlen);
 extern const unsigned int collation_table[];
@@ -155,11 +158,183 @@ int wine_get_sortkey(int flags, const WCHAR *src, int srclen, char *dst, int dst
     return key_ptr[3] - dst;
 }
 
-static inline int compare_unicode_weights(int flags, const WCHAR *str1, int len1,
+static inline int is_windows_special_character(WCHAR ch) {
+    return isascii(ch) && !isalnum(ch);
+}
+
+static inline int is_ignored_character_in_word_sort(WCHAR ch) {
+    return (ch <= 8 || (ch >= 14 && ch <= 31) || ch == 127 ||
+            ch == '\'' || ch == '-');
+}
+
+static inline int is_hiragana(WCHAR ch) {
+    return (ch >= 0x3040 && ch <= 0x309F);
+}
+
+static inline int is_wide_katakana(WCHAR ch) {
+    return (ch >= 0x30A0 && ch <= 0x30FF);
+}
+
+static inline int is_half_katakana(WCHAR ch) {
+    return (ch >= 0xFF60 && ch <= 0xFF9F);
+}
+
+static inline int is_wide_kana(WCHAR ch) {
+    return is_hiragana(ch) || is_wide_katakana(ch);
+}
+
+static inline int is_kana(WCHAR ch) {
+    return is_wide_kana(ch) || is_half_katakana(ch);
+}
+
+static inline int is_kanji(WCHAR ch) {
+    return (ch >= 0x4E00 && ch <= 0x9FBF);
+}
+
+static inline int is_ascii_latin(WCHAR ch) {
+    return ((ch >= 'a' && ch <= 'z') ||
+            (ch >= 'A' && ch <= 'Z'));
+}
+
+static inline int is_wide_latin(WCHAR ch) {
+    return ((ch >= 0xFF21 && ch <= 0xFF39) ||
+            (ch >= 0xFF41 && ch <= 0xFF59));
+}
+
+static inline WCHAR wide_latin_to_ascii(WCHAR ch) {
+    if (ch >= 0xFF21 && ch <= 0xFF39) /* ＡーＺ */
+        return ch - 0xFF21 + 'A';
+    else if (ch >= 0xFF41 && ch <= 0xFF59) /* ａーｚ */
+        return ch - 0xFF41 + 'a';
+    else
+        return ch;
+}
+
+static inline int is_latin(WCHAR ch) {
+    /* Warning: untested */
+    int ce = collation_table[collation_table[ch >> 8] + (ch & 0xff)];
+    int unicode_weight = ce >> 16;
+    return (unicode_weight >= 0x0A15 && unicode_weight <= 0x0C13);
+}
+
+static inline int is_japanese(WCHAR ch) {
+    return is_wide_kana(ch) || is_half_katakana(ch) || is_kanji(ch);
+}
+
+static inline WCHAR katakana_hankaku_to_hanzen(const WCHAR ** str, int *len) {
+    const WCHAR katakana_map[] = {
+        /* 　。「」、・ヲァィゥェォャュョッ
+           ーアイウエオカキクケコサシスセソ
+           タチツテトナニヌネノハヒフヘホマ
+           ミムメモヤユヨラリルレロワン      */
+        0x3000, 0x3002, 0x300c, 0x300d, 0x3001, 0x30fb, 0x30f2, 0x30a1, 
+        0x30a3, 0x30a5, 0x30a7, 0x30a9, 0x30e3, 0x30e5, 0x30e7, 0x30c3, 
+        0x30fc, 0x30a2, 0x30a4, 0x30a6, 0x30a8, 0x30aa, 0x30ab, 0x30ad, 
+        0x30af, 0x30b1, 0x30b3, 0x30b5, 0x30b7, 0x30b9, 0x30bb, 0x30bd, 
+        0x30bf, 0x30c1, 0x30c4, 0x30c6, 0x30c8, 0x30ca, 0x30cb, 0x30cc, 
+        0x30cd, 0x30ce, 0x30cf, 0x30d2, 0x30d5, 0x30d8, 0x30db, 0x30de, 
+        0x30df, 0x30e0, 0x30e1, 0x30e2, 0x30e4, 0x30e6, 0x30e8, 0x30e9, 
+        0x30ea, 0x30eb, 0x30ec, 0x30ed, 0x30ef, 0x30f3
+    };
+
+    WCHAR ch;
+    ch = **str;
+    if (ch < 0xFF9E) { /* it's not a digraph mark */
+        ch = katakana_map[ch - 0xFF60];
+    } else {
+        return '?';
+    }
+
+    if (*len >= 2) {
+        if ((*str)[1] == 0xFF9E) { /* dakuten */
+            ch += 1;
+            (*str)++; (*len)--;
+        } else if ((*str)[1] == 0xFF9F) { /* handakuten */
+            ch += 2;
+            (*str)++; (*len)--;
+        }
+    }
+
+    return ch;
+}
+
+static inline int only_symbols_remains(const WCHAR * str1, int len1, const WCHAR * str2, int len2) {
+    const WCHAR* remaining_str;
+    int remaining_len;
+    if (len1 == 0) {
+        remaining_str = str2;
+        remaining_len = len2;
+    } else if (len2 == 0) {
+        remaining_str = str1;
+        remaining_len = len1;
+    } else {
+        return 0;
+    }
+
+    while (remaining_len > 0) {
+        if (get_char_typeW(*remaining_str) & (C1_PUNCT | C1_SPACE))
+        {
+            remaining_str++;
+            remaining_len--;
+        } else {
+            return 0;
+        }
+    }
+    return 1;
+}
+
+static inline int get_windows_special_character_weight(LCID lcid, WCHAR ch) {
+    /* This is tested for english locale, but works for many others */
+    const int basic_weight_table[] = {
+        /*0*/00, /*1*/10, /*2*/20, /*3*/30, /*4*/40, 
+        /*5*/50, /*6*/60, /*7*/70, /*8*/80, /*9*/310, 
+        /*10*/320, /*11*/330, /*12*/340, /*13*/350, /*14*/90, 
+        /*15*/100, /*16*/110, /*17*/120, /*18*/130, /*19*/140, 
+        /*20*/150, /*21*/160, /*22*/170, /*23*/180, /*24*/190, 
+        /*25*/200, /*26*/210, /*27*/220, /*28*/230, /*29*/240, 
+        /*30*/250, /*31*/260, /*32*/300, /*33:!*/360, /*34:"*/370, 
+        /*35:#*/380, /*36:$*/390, /*37:%*/400, /*38:&*/410, /*39:'*/280, 
+        /*40:(*/420, /*41:)*/430, /*42:**/440, /*43:+*/620, /*44:0,*/450, 
+        /*45:-*/290, /*46:.*/460, /*47:/*/470,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* [0-9] */
+        /*58::*/480, /*59:;*/490, /*60:<*/630, /*61:=*/640,
+        /*62:>*/650, /*63:?*/500, /*64:@*/510, 
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* [A-Z] */
+        00, /*91:[*/520, /*92:\*/530, /*93:]*/540,
+        /*94:^*/550, /*95:_*/560, /*96:`*/570,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* [a-z] */
+        /*123:{*/580, /*124:|*/590, /*125:}*/600, /*126:~*/610, /*127*/270
+    };
+
+    if (ch <= 127) {
+        /* Some languages have diferences on how symbols are ordered in Windows.
+         * They can be written below. */
+        if (((lcid & LANG_JAPANESE) || (lcid & LANG_KOREAN)) && ch == '\\') {
+            return 651; /* In japanese and korean locales, backslash (which is often displayed as 
+                           yen or won symbol, respectively, instead) is sorted just after '>' and
+                           before [0-9]. */
+        } else {
+            return basic_weight_table[ch];
+        }
+    } else {
+        ERR("get_windows_special_character_weight has received a non-ascii char.");
+        return 0;
+    }
+}
+
+static inline int compare_unicode_weights(LCID lcid, int flags, const WCHAR *str1, int len1,
                                           const WCHAR *str2, int len2)
 {
     unsigned int ce1, ce2;
     int ret;
+    int last_no_ignored1 = 1, last_no_ignored2 = 1;
+
+    while (last_no_ignored1 <= len1 &&
+            is_ignored_character_in_word_sort(str1[len1-last_no_ignored1]))
+        last_no_ignored1++;
+    while (last_no_ignored2 <= len2 &&
+            is_ignored_character_in_word_sort(str2[len2-last_no_ignored2]))
+        last_no_ignored2++;
 
     /* 32-bit collation element table format:
      * unicode weight - high 16 bit, diacritic weight - high 8 bit of low 16 bit,
@@ -191,30 +366,34 @@ static inline int compare_unicode_weights(int flags, const WCHAR *str1, int len1
         */
         if (!(flags & SORT_STRINGSORT))
         {
-            if (*str1 == '-' || *str1 == '\'')
-            {
-                if (*str2 != '-' && *str2 != '\'')
-                {
-                    str1++;
-                    len1--;
-                    continue;
-                }
-            }
-            else if (*str2 == '-' || *str2 == '\'')
+            if (len1 > last_no_ignored1 && is_ignored_character_in_word_sort(*str1))
             {
+                str1++;
+                len1--;
+                continue;
+            } else if (len2 > last_no_ignored2 && is_ignored_character_in_word_sort(*str2)) {
                 str2++;
                 len2--;
                 continue;
             }
         }
 
-        ce1 = collation_table[collation_table[*str1 >> 8] + (*str1 & 0xff)];
-        ce2 = collation_table[collation_table[*str2 >> 8] + (*str2 & 0xff)];
-
-        if (ce1 != (unsigned int)-1 && ce2 != (unsigned int)-1)
-            ret = (ce1 >> 16) - (ce2 >> 16);
-        else
-            ret = *str1 - *str2;
+        if (!(is_windows_special_character(*str1) && 
+            is_windows_special_character(*str2)))
+        {
+            ce1 = collation_table[collation_table[*str1 >> 8] + (*str1 & 0xff)];
+            ce2 = collation_table[collation_table[*str2 >> 8] + (*str2 & 0xff)];
+
+            if (ce1 != (unsigned int)-1 && ce2 != (unsigned int)-1)
+                ret = (ce1 >> 16) - (ce2 >> 16);
+            else
+                ret = *str1 - *str2;
+        } else {
+            int weight1, weight2;
+            weight1 = get_windows_special_character_weight(lcid, *str1);
+            weight2 = get_windows_special_character_weight(lcid, *str2);
+            ret = weight1 - weight2;
+        }
 
         if (ret) return ret;
 
@@ -233,14 +412,25 @@ static inline int compare_unicode_weights(int flags, const WCHAR *str1, int len1
         str2++;
         len2--;
     }
+    /* if NORM_IGNORESYMBOLS is set, "dream," must match "dream" as equal. */
+    if ((flags & NORM_IGNORESYMBOLS) && only_symbols_remains(str1, len1, str2, len2))
+        return 0;
     return len1 - len2;
 }
 
-static inline int compare_diacritic_weights(int flags, const WCHAR *str1, int len1,
-                                            const WCHAR *str2, int len2)
+static inline int compare_case_weights(LCID lcid, int flags, const WCHAR *str1, int len1,
+                                       const WCHAR *str2, int len2)
 {
     unsigned int ce1, ce2;
     int ret;
+    int last_no_ignored1 = 1, last_no_ignored2 = 1;
+
+    while (last_no_ignored1 <= len1 &&
+            is_ignored_character_in_word_sort(str1[len1-last_no_ignored1]))
+        last_no_ignored1++;
+    while (last_no_ignored2 <= len2 &&
+            is_ignored_character_in_word_sort(str2[len2-last_no_ignored2]))
+        last_no_ignored2++;
 
     /* 32-bit collation element table format:
      * unicode weight - high 16 bit, diacritic weight - high 8 bit of low 16 bit,
@@ -267,13 +457,69 @@ static inline int compare_diacritic_weights(int flags, const WCHAR *str1, int le
             if (skip) continue;
         }
 
-        ce1 = collation_table[collation_table[*str1 >> 8] + (*str1 & 0xff)];
-        ce2 = collation_table[collation_table[*str2 >> 8] + (*str2 & 0xff)];
+       /* hyphen and apostrophe are treated differently depending on
+        * whether SORT_STRINGSORT specified or not
+        */
+        if (!(flags & SORT_STRINGSORT))
+        {
+            if (len1 > last_no_ignored1 && is_ignored_character_in_word_sort(*str1))
+            {
+                str1++;
+                len1--;
+                continue;
+            } else if (len2 > last_no_ignored2 && is_ignored_character_in_word_sort(*str2)) {
+                str2++;
+                len2--;
+                continue;
+            }
+        }
 
-        if (ce1 != (unsigned int)-1 && ce2 != (unsigned int)-1)
-            ret = ((ce1 >> 8) & 0xff) - ((ce2 >> 8) & 0xff);
-        else
-            ret = *str1 - *str2;
+        if (!(is_windows_special_character(*str1) && 
+            is_windows_special_character(*str2)))
+        {
+            int case1, case2;
+            ce1 = collation_table[collation_table[*str1 >> 8] + (*str1 & 0xff)];
+            ce2 = collation_table[collation_table[*str2 >> 8] + (*str2 & 0xff)];
+
+            case1 = (ce1 >> 4) &0x0f;
+            case2 = (ce2 >> 4) &0x0f;
+            ret = case1 - case2;
+
+            if (ret) {
+                if (is_kana(*str1) && is_kana(*str2)) {
+                    if (flags & NORM_IGNOREKANATYPE) {
+                        if (flags & NORM_IGNOREWIDTH) {
+                            ret = 0;
+                        } else if (!is_half_katakana(*str1) && !is_half_katakana(*str2)) {
+                            ret = 0;
+                        }
+                    } else if (flags & NORM_IGNOREWIDTH) {
+                        if (!is_hiragana(*str1) && !is_hiragana(*str2)) {
+                            ret = 0;
+                        }
+                    }
+                } else if (is_latin(*str1) && is_latin(*str2)) {
+                    if ((flags & NORM_IGNOREWIDTH) && (flags & NORM_IGNORECASE)) {
+                        ret = 0;
+                    } else if (flags & NORM_IGNOREWIDTH) {
+                        /* Warning: Manual test seems to reveal width differences let a case field
+                         * difference of 1, but I am unsure of this. */
+                        if (ret == 1 || ret == -1) {
+                            ret = 0;
+                        }
+                    } else if (flags & NORM_IGNORECASE) {
+                        ret = 0;
+                    }
+                } else if (flags & NORM_IGNORECASE) {
+                    ret = 0;
+                }
+            }
+        } else {
+            int weight1, weight2;
+            weight1 = get_windows_special_character_weight(lcid, *str1);
+            weight2 = get_windows_special_character_weight(lcid, *str2);
+            ret = weight1 - weight2;
+        }
 
         if (ret) return ret;
 
@@ -292,13 +538,35 @@ static inline int compare_diacritic_weights(int flags, const WCHAR *str1, int le
         str2++;
         len2--;
     }
+    /* if NORM_IGNORESYMBOLS is set, "dream," must match "dream" as equal. */
+    if ((flags & NORM_IGNORESYMBOLS) && only_symbols_remains(str1, len1, str2, len2))
+        return 0;
     return len1 - len2;
 }
 
-static inline int compare_case_weights(int flags, const WCHAR *str1, int len1,
-                                       const WCHAR *str2, int len2)
+static inline WCHAR convert_to_diacritic_comparable(int flags, const WCHAR **str, int *len) {
+    WCHAR ch;
+    ch = **str;
+    if (flags & NORM_IGNOREWIDTH) {
+        if (is_half_katakana(ch))
+            ch = katakana_hankaku_to_hanzen(str, len);
+        if (is_wide_latin(ch))
+            ch = wide_latin_to_ascii(ch);
+    }
+    if (flags & NORM_IGNOREKANATYPE) {
+        if (is_wide_katakana(ch))
+            ch -= 0x60; /* convert to hiragana */
+    }
+    if (flags & NORM_IGNORECASE) {
+        if (isalphaW(ch))
+            ch = tolowerW(ch);
+    }
+    return ch;
+}
+
+static inline int compare_diacritic_weights(LCID lcid, int flags, const WCHAR *str1, int len1,
+                                            const WCHAR *str2, int len2)
 {
-    unsigned int ce1, ce2;
     int ret;
 
     /* 32-bit collation element table format:
@@ -326,13 +594,21 @@ static inline int compare_case_weights(int flags, const WCHAR *str1, int len1,
             if (skip) continue;
         }
 
-        ce1 = collation_table[collation_table[*str1 >> 8] + (*str1 & 0xff)];
-        ce2 = collation_table[collation_table[*str2 >> 8] + (*str2 & 0xff)];
-
-        if (ce1 != (unsigned int)-1 && ce2 != (unsigned int)-1)
-            ret = ((ce1 >> 4) & 0x0f) - ((ce2 >> 4) & 0x0f);
-        else
-            ret = *str1 - *str2;
+        if (!(is_windows_special_character(*str1) && 
+            is_windows_special_character(*str2)))
+        {
+            WCHAR ch1, ch2;
+            ch1 = convert_to_diacritic_comparable(flags, &str1, &len1);
+            ch2 = convert_to_diacritic_comparable(flags, &str2, &len2);
+
+            /* To put it simply, Wine collation table does not work with diacritics */
+            ret = ch1 - ch2;
+        } else {
+            int weight1, weight2;
+            weight1 = get_windows_special_character_weight(lcid, *str1);
+            weight2 = get_windows_special_character_weight(lcid, *str2);
+            ret = weight1 - weight2;
+        }
 
         if (ret) return ret;
 
@@ -351,21 +627,26 @@ static inline int compare_case_weights(int flags, const WCHAR *str1, int len1,
         str2++;
         len2--;
     }
+    /* if NORM_IGNORESYMBOLS is set, "dream," must match "dream" as equal. */
+    if ((flags & NORM_IGNORESYMBOLS) && only_symbols_remains(str1, len1, str2, len2))
+        return 0;
     return len1 - len2;
 }
 
-int wine_compare_string(int flags, const WCHAR *str1, int len1,
+int wine_compare_string(LCID lcid, int flags, const WCHAR *str1, int len1,
                         const WCHAR *str2, int len2)
 {
     int ret;
 
-    ret = compare_unicode_weights(flags, str1, len1, str2, len2);
+    ret = compare_unicode_weights(lcid, flags, str1, len1, str2, len2);
     if (!ret)
     {
-        if (!(flags & NORM_IGNORENONSPACE))
-            ret = compare_diacritic_weights(flags, str1, len1, str2, len2);
-        if (!ret && !(flags & NORM_IGNORECASE))
-            ret = compare_case_weights(flags, str1, len1, str2, len2);
+        if ((flags & (NORM_IGNORECASE | NORM_IGNOREKANATYPE | NORM_IGNOREWIDTH)) !=
+                (NORM_IGNORECASE | NORM_IGNOREKANATYPE | NORM_IGNOREWIDTH))
+            ret = compare_case_weights(lcid, flags, str1, len1, str2, len2);
+        
+        if (!ret && !(flags & NORM_IGNORENONSPACE))
+            ret = compare_diacritic_weights(lcid, flags, str1, len1, str2, len2);
     }
     return ret;
 }