From 2886a914d5a921d32157ec4672e45406e7ce8e5e Mon Sep 17 00:00:00 2001
From: bigml <bigmaliang@gmail.com>
Date: Thu, 8 Sep 2011 17:41:31 +0800
Subject: [PATCH] utf8 - utf8 chinese character index and query ok(except word
 divide)

---
 .gitignore             | 36 ++++++++++++++++++++++++
 src/.gitignore         |  2 ++
 src/include/.gitignore |  2 ++
 src/mlparse.c          | 64 +++++++++++++++++++++---------------------
 src/queryparse.c       | 24 ++++++++--------
 src/str.c              | 14 ++++-----
 src/test/.gitignore    |  1 +
 tools/.gitignore       |  1 +
 8 files changed, 93 insertions(+), 51 deletions(-)
 create mode 100644 .gitignore
 create mode 100644 src/.gitignore
 create mode 100644 src/include/.gitignore
 create mode 100644 src/test/.gitignore
 create mode 100644 tools/.gitignore

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..c928c3e
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,36 @@
+cscope.files
+cscope.out
+index.*
+.libs/
+.deps/
+.dirstamp
+
+Makefile
+btree
+chash.test
+config.log
+config.status
+hashtime
+lcrand.test
+libtool
+libzet.la
+mime.test
+mlparse.test
+mlparse_wrap.test
+objalloc.test
+poolalloc.test
+psettings_gen
+psettings_gen.test
+setup.py
+
+zet
+zet_cat
+zet_dict
+zet_diff
+zet_file
+zet_impactify
+zet_trec
+
+staticalloc.test
+stem.test
+stop_gen
diff --git a/src/.gitignore b/src/.gitignore
new file mode 100644
index 0000000..7f18ac5
--- /dev/null
+++ b/src/.gitignore
@@ -0,0 +1,2 @@
+*.o
+*.lo
diff --git a/src/include/.gitignore b/src/include/.gitignore
new file mode 100644
index 0000000..f611548
--- /dev/null
+++ b/src/include/.gitignore
@@ -0,0 +1,2 @@
+config.h
+stamp-h1
diff --git a/src/mlparse.c b/src/mlparse.c
index 3c91c1a..f42c1c1 100644
--- a/src/mlparse.c
+++ b/src/mlparse.c
@@ -1122,6 +1122,7 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
 
         case ASCII_CASE_LOWER:
         case ASCII_CASE_DIGIT:
+        case ASCII_CASE_EXTENDED:
             word[0] = *pos++;
             st->len = 1;
             goto word_label;
@@ -1136,7 +1137,6 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
             }
             break;
 
-        case ASCII_CASE_EXTENDED:
         default:
             if (strip) {
                 /* ignore junk as input */
@@ -1240,6 +1240,7 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
 
         case ASCII_CASE_LOWER:
         case ASCII_CASE_DIGIT:
+        case ASCII_CASE_EXTENDED:
             /* push character onto word */
             PUSH(*pos++, MLPARSE_WORD, STATE_WORD);
             break;
@@ -1260,7 +1261,6 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
             *length = st->len;
             RETURN(MLPARSE_WORD, STATE_TOPLEVEL);
 
-        case ASCII_CASE_EXTENDED:
         case ASCII_CASE_CONTROL:
             if (strip) {
                 /* ignore junk in words */
@@ -1373,6 +1373,7 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
 
         case ASCII_CASE_LOWER:
         case ASCII_CASE_DIGIT:
+        case ASCII_CASE_EXTENDED:
             /* push character onto word */
             PUSH(*pos++, MLPARSE_WORD, STATE_PUNC);
             goto word_label;
@@ -1392,7 +1393,6 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
             *length = st->len;
             RETURN(MLPARSE_WORD, STATE_TOPLEVEL);
 
-        case ASCII_CASE_EXTENDED:
         case ASCII_CASE_CONTROL:
             if (strip) {
                 /* ignore junk in words */
@@ -1503,11 +1503,11 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
 
         case ASCII_CASE_LOWER:
         case ASCII_CASE_DIGIT:
+        case ASCII_CASE_EXTENDED:
             /* push character onto word */
             PUSH(*pos++, MLPARSE_WORD, STATE_ACRONYM);
             goto word_label;
 
-        case ASCII_CASE_EXTENDED:
         default:
             /* anything else, let the word state deal with it */
             goto word_label;
@@ -1562,11 +1562,11 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
 
         case ASCII_CASE_LOWER:
         case ASCII_CASE_DIGIT:
+        case ASCII_CASE_EXTENDED:
             /* acronym ends, push character onto word */
             PUSH(*pos++, MLPARSE_WORD, STATE_ACRONYM_LETTER);  
             goto word_label;
 
-        case ASCII_CASE_EXTENDED:
         default:
             /* anything else, let the punc state deal with it */
             goto punc_label;
@@ -1604,7 +1604,6 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
     while (tmppos < tmpend) {
         switch (*tmppos) {
         default:
-        case ASCII_CASE_EXTENDED:
         case ASCII_CASE_SPACE:
             /* can't have space or other junk directly after tag open, 
              * its not a tag */
@@ -1635,6 +1634,7 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
         case '/': case '?': 
         case ASCII_CASE_LOWER:
         case ASCII_CASE_DIGIT:
+        case ASCII_CASE_EXTENDED:
             /* pretty much anything else qualifies, loosely speaking */
             st->tagbuf[0] = *tmppos;
             st->tagbuflen = 1;
@@ -1649,7 +1649,6 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
     while (tmppos < tmpend) {
         switch (*tmppos) {
         default:
-        case ASCII_CASE_EXTENDED:
             /* don't accept junk in tag names */
             JUMP(st->next_state, st->flags & ~FLAG_BUFFER, 0);
 
@@ -1687,6 +1686,7 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
         case '.': case '-': case '_': case ':':
         case ASCII_CASE_LOWER:
         case ASCII_CASE_DIGIT:
+        case ASCII_CASE_EXTENDED:
             /* pretty much anything else qualifies, loosely speaking */
             if (st->tagbuflen < st->wordlen) {
                 st->tagbuf[st->tagbuflen++] = *tmppos;
@@ -1706,7 +1706,6 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
     while (tmppos < tmpend) {
         switch (*tmppos) {
         default:
-        case ASCII_CASE_EXTENDED:
             /* don't accept junk in tag names */
             JUMP(st->next_state, st->flags & ~FLAG_BUFFER, 0);
 
@@ -1726,6 +1725,7 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
         case '.': case '-': case '_': case ':':
         case ASCII_CASE_LOWER:
         case ASCII_CASE_DIGIT:
+        case ASCII_CASE_EXTENDED:
             /* pretty much anything else qualifies, loosely speaking */
             tmppos++;
             st->count++;
@@ -1813,6 +1813,7 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
             /* fallthrough */
         case ASCII_CASE_LOWER:
         case ASCII_CASE_DIGIT:
+        case ASCII_CASE_EXTENDED:
             /* add character to tag name */
             PUSH(*pos++, MLPARSE_TAG, STATE_TAG_NAME_CONT);
             break;
@@ -1827,7 +1828,6 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
             CANT_GET_HERE();
 
         default:
-        case ASCII_CASE_EXTENDED:
             /* ignore anything else */
             pos++;
             break;
@@ -1877,6 +1877,7 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
             /* fallthrough */
         case ASCII_CASE_LOWER:
         case ASCII_CASE_DIGIT:
+        case ASCII_CASE_EXTENDED:
             /* add character to tag name */
             word[0] = *pos++;
             st->len = 1;
@@ -1892,7 +1893,6 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
             pos++;
             goto pval_dquot_label;
 
-        case ASCII_CASE_EXTENDED:
         default:
             /* ignore everything else */
             pos++;
@@ -1968,11 +1968,11 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
             /* fallthrough */
         case ASCII_CASE_LOWER:
         case ASCII_CASE_DIGIT:
+        case ASCII_CASE_EXTENDED:
             /* add character to tag name */
             PUSH(*pos++, MLPARSE_PARAM, STATE_PARAM);
             break;
 
-        case ASCII_CASE_EXTENDED:
         default:
             /* ignore junk */
             pos++;
@@ -2015,12 +2015,12 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
             /* fallthrough */
         case ASCII_CASE_LOWER:
         case ASCII_CASE_DIGIT:
+        case ASCII_CASE_EXTENDED:
             /* start of a different parameter */
             word[0] = *pos++;
             st->len = 1;
             goto param_label;
 
-        case ASCII_CASE_EXTENDED:
         default:
         case ASCII_CASE_SPACE:
             /* ignore */
@@ -2062,12 +2062,12 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
 
         case ASCII_CASE_LOWER:
         case ASCII_CASE_DIGIT:
+        case ASCII_CASE_EXTENDED:
             /* start of a whitespace delimited parameter value */
             word[0] = *pos++;
             st->len = 1;
             goto pval_label;
 
-        case ASCII_CASE_EXTENDED:
         case ASCII_CASE_SPACE:
         default:
             /* ignore */
@@ -2109,6 +2109,7 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
 
         case ASCII_CASE_LOWER:
         case ASCII_CASE_DIGIT:
+        case ASCII_CASE_EXTENDED:
             /* push character onto pval */
             PUSH(*pos++, MLPARSE_PARAMVAL, STATE_PVAL);
             break;
@@ -2125,7 +2126,6 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
             st->next_state = STATE_PVAL;
             goto pval_selfend_label;
 
-        case ASCII_CASE_EXTENDED:
         case ASCII_CASE_CONTROL:   
             /* ignore junk */
             pos++;
@@ -2167,6 +2167,7 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
 
         case ASCII_CASE_LOWER:
         case ASCII_CASE_DIGIT:
+        case ASCII_CASE_EXTENDED:
             /* push character onto word */
             PUSH(*pos++, MLPARSE_PARAMVAL, STATE_PVAL_PUNC);
             goto pval_label;
@@ -2183,7 +2184,6 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
             st->next_state = STATE_PVAL_PUNC;
             goto pval_selfend_label;
  
-        case ASCII_CASE_EXTENDED:
         case ASCII_CASE_CONTROL:
             /* ignore junk */
             pos++;
@@ -2234,6 +2234,7 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
 
         case ASCII_CASE_LOWER:
         case ASCII_CASE_DIGIT:
+        case ASCII_CASE_EXTENDED:
             /* push character onto pval */
             word[0] = *pos++;
             st->len = 1;
@@ -2259,7 +2260,6 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
             }
             break;
 
-        case ASCII_CASE_EXTENDED:
         case ASCII_CASE_CONTROL:
             /* ignore */
             pos++;
@@ -2310,6 +2310,7 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
 
         case ASCII_CASE_LOWER:
         case ASCII_CASE_DIGIT:
+        case ASCII_CASE_EXTENDED:
             /* push character onto pval */
             word[0] = *tmppos;
             st->len = 1;
@@ -2331,7 +2332,6 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
             }
             break;
 
-        case ASCII_CASE_EXTENDED:
         case ASCII_CASE_CONTROL:
             /* ignore */
             break;
@@ -2379,6 +2379,7 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
 
         case ASCII_CASE_LOWER:
         case ASCII_CASE_DIGIT:
+        case ASCII_CASE_EXTENDED:
             /* push character onto pval */
             PUSH(*pos++, MLPARSE_PARAMVAL, STATE_PVAL_QUOT_WORD);
             goto pval_quot_word_label;
@@ -2388,7 +2389,6 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
             *length = st->len;
             RETURN(MLPARSE_PARAMVAL, STATE_PVAL_QUOT);
 
-        case ASCII_CASE_EXTENDED:
         case ASCII_CASE_CONTROL:
             /* ignore */
             pos++;
@@ -2449,6 +2449,7 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
 
         case ASCII_CASE_LOWER:
         case ASCII_CASE_DIGIT:
+        case ASCII_CASE_EXTENDED:
             /* push character onto pval */
             PUSH(*tmppos, MLPARSE_PARAMVAL, STATE_PVAL_QUOT_WORD_ESC);
             break;
@@ -2458,7 +2459,6 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
             *length = st->len;
             RETURN(MLPARSE_PARAMVAL, STATE_PVAL_QUOT);
 
-        case ASCII_CASE_EXTENDED:
         case ASCII_CASE_CONTROL:
             /* ignore */
             break;
@@ -2508,6 +2508,7 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
 
         case ASCII_CASE_LOWER:
         case ASCII_CASE_DIGIT:
+        case ASCII_CASE_EXTENDED:
             /* push character onto pval */
             PUSH(*pos++, MLPARSE_PARAMVAL, STATE_PVAL_QUOT_PUNC);
             goto pval_quot_word_label;
@@ -2517,7 +2518,6 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
             *length = st->len;
             RETURN(MLPARSE_PARAMVAL, STATE_PVAL_QUOT);
 
-        case ASCII_CASE_EXTENDED:
         case ASCII_CASE_CONTROL:
             /* ignore */
             pos++;
@@ -2572,6 +2572,7 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
 
         case ASCII_CASE_LOWER:
         case ASCII_CASE_DIGIT:
+        case ASCII_CASE_EXTENDED:
             /* push character onto pval */
             PUSH(*tmppos, MLPARSE_PARAMVAL, STATE_PVAL_QUOT_PUNC_ESC);
             goto pval_quot_word_label;
@@ -2581,7 +2582,6 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
             *length = st->len;
             RETURN(MLPARSE_PARAMVAL, STATE_PVAL_QUOT);
 
-        case ASCII_CASE_EXTENDED:
         case ASCII_CASE_CONTROL:
             /* ignore */
             break;
@@ -2696,6 +2696,7 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
 
         case ASCII_CASE_LOWER:
         case ASCII_CASE_DIGIT:
+        case ASCII_CASE_EXTENDED:
             /* push character onto pval */
             word[0] = *pos++;
             st->len = 1;
@@ -2721,7 +2722,6 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
             }
             break;
 
-        case ASCII_CASE_EXTENDED:
         case ASCII_CASE_CONTROL:
             /* ignore */
             pos++;
@@ -2775,6 +2775,7 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
 
         case ASCII_CASE_LOWER:
         case ASCII_CASE_DIGIT:
+        case ASCII_CASE_EXTENDED:
             /* push character onto pval */
             word[0] = *tmppos;
             st->len = 1;
@@ -2790,7 +2791,6 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
             }
             break;
 
-        case ASCII_CASE_EXTENDED:
         case ASCII_CASE_CONTROL:
             /* ignore */
             break;
@@ -2838,6 +2838,7 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
 
         case ASCII_CASE_LOWER:
         case ASCII_CASE_DIGIT:
+        case ASCII_CASE_EXTENDED:
             /* push character onto pval */
             PUSH(*pos++, MLPARSE_PARAMVAL, STATE_PVAL_DQUOT_WORD);
             goto pval_dquot_word_label;
@@ -2847,7 +2848,6 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
             *length = st->len;
             RETURN(MLPARSE_PARAMVAL, STATE_PVAL_DQUOT);
 
-        case ASCII_CASE_EXTENDED:
         case ASCII_CASE_CONTROL:
             /* ignore */
             pos++;
@@ -2900,6 +2900,7 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
 
         case ASCII_CASE_LOWER:
         case ASCII_CASE_DIGIT:
+        case ASCII_CASE_EXTENDED:
             /* push character onto pval */
             PUSH(*tmppos, MLPARSE_PARAMVAL, STATE_PVAL_DQUOT_WORD_ESC);
             goto pval_dquot_word_label;
@@ -2909,7 +2910,6 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
             *length = st->len;
             RETURN(MLPARSE_PARAMVAL, STATE_PVAL_DQUOT);
 
-        case ASCII_CASE_EXTENDED:
         case ASCII_CASE_CONTROL:
             /* ignore */
             break;
@@ -2963,11 +2963,11 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
 
         case ASCII_CASE_LOWER:
         case ASCII_CASE_DIGIT:
+        case ASCII_CASE_EXTENDED:
             /* push character onto pval */
             PUSH(*pos++, MLPARSE_PARAMVAL, STATE_PVAL_DQUOT_PUNC);
             goto pval_dquot_word_label;
 
-        case ASCII_CASE_EXTENDED:
         case ASCII_CASE_CONTROL:
             /* ignore */
             pos++;
@@ -3026,11 +3026,11 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
 
         case ASCII_CASE_LOWER:
         case ASCII_CASE_DIGIT:
+        case ASCII_CASE_EXTENDED:
             /* push character onto pval */
             PUSH(*tmppos, MLPARSE_PARAMVAL, STATE_PVAL_DQUOT_PUNC_ESC);
             goto pval_dquot_word_label;
 
-        case ASCII_CASE_EXTENDED:
         case ASCII_CASE_CONTROL:
             /* ignore */
             break;
@@ -3125,6 +3125,7 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
         case '.': case '-': case '_': case ':':
         case ASCII_CASE_LOWER:
         case ASCII_CASE_DIGIT:
+        case ASCII_CASE_EXTENDED:
             st->erefbuf[0] = *tmppos++;
             st->count = 1;
             goto eref_peek_label;
@@ -3135,7 +3136,6 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
             goto eref_num_peek_first_label;
 
         default:
-        case ASCII_CASE_EXTENDED:
             /* shouldn't get anything else */
             JUMP(st->next_state, st->flags & ~FLAG_BUFFER, 0);
         }
@@ -3284,6 +3284,7 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
         case '.': case '-': case '_': case ':':
         case ASCII_CASE_LOWER:
         case ASCII_CASE_DIGIT:
+        case ASCII_CASE_EXTENDED:
             st->erefbuf[st->count++] = *tmppos++;
             break;
 
@@ -3360,7 +3361,6 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
             }
             /* fallthrough for failed conversions */
 
-        case ASCII_CASE_EXTENDED:
         default:
             /* anything else isn't valid */
             JUMP(st->next_state, st->flags & ~FLAG_BUFFER, 0);
@@ -3581,6 +3581,7 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
 
         case ASCII_CASE_LOWER:
         case ASCII_CASE_DIGIT:
+        case ASCII_CASE_EXTENDED:
             /* push character onto word */
             word[0] = *pos++;
             st->len = 1;
@@ -3618,7 +3619,6 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
             }
             break;
 
-        case ASCII_CASE_EXTENDED:
         case ASCII_CASE_CONTROL:
             if (strip) {
                 /* ignore junk in words */
@@ -3646,6 +3646,7 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
 
         case ASCII_CASE_LOWER:
         case ASCII_CASE_DIGIT:
+        case ASCII_CASE_EXTENDED:
             /* push character onto word */
             PUSH(*pos++, MLPARSE_WORD, STATE_WORD);
             break;
@@ -3677,7 +3678,6 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
             }
             goto ccdata_punc_label;
 
-        case ASCII_CASE_EXTENDED:
         case ASCII_CASE_CONTROL:
             if (strip) {
                 /* ignore junk in words */
@@ -3716,6 +3716,7 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
 
         case ASCII_CASE_LOWER:
         case ASCII_CASE_DIGIT:
+        case ASCII_CASE_EXTENDED:
             /* push character onto word */
             PUSH(*pos++, MLPARSE_WORD, STATE_PUNC);
             goto ccdata_word_label;
@@ -3726,7 +3727,6 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length,
             *length = st->len;
             RETURN(MLPARSE_WORD, STATE_TOPLEVEL);
 
-        case ASCII_CASE_EXTENDED:
         case ASCII_CASE_CONTROL:
             if (strip) {
                 /* ignore junk in words */
diff --git a/src/queryparse.c b/src/queryparse.c
index 3f11b17..857b7a6 100644
--- a/src/queryparse.c
+++ b/src/queryparse.c
@@ -153,6 +153,7 @@ enum queryparse_ret queryparse_parse(struct queryparse *parser,
 
         case ASCII_CASE_LOWER:
         case ASCII_CASE_DIGIT:
+        case ASCII_CASE_EXTENDED:
             /* push character onto word */
             if (*len < parser->maxwordlen) {
                 word[(*len)++] = c;
@@ -192,7 +193,6 @@ enum queryparse_ret queryparse_parse(struct queryparse *parser,
         case '(':
             parser->warn |= QUERYPARSE_WARN_PARENS_BOOLEAN; /*warn,fallthrough*/
         default:
-        case ASCII_CASE_EXTENDED:
             /* anything else we don't record in the word, but go to inword 
              * anyway (so we know when a string of junk characters occurred) */
             goto inword_label;
@@ -213,6 +213,7 @@ enum queryparse_ret queryparse_parse(struct queryparse *parser,
             c = ASCII_TOLOWER(c);
         case ASCII_CASE_LOWER:
         case ASCII_CASE_DIGIT:
+        case ASCII_CASE_EXTENDED:
             /* push character onto word */
             if (*len < parser->maxwordlen) {
                 word[(*len)++] = c;
@@ -230,7 +231,6 @@ enum queryparse_ret queryparse_parse(struct queryparse *parser,
             goto punc_label;
 
         case ASCII_CASE_CONTROL:
-        case ASCII_CASE_EXTENDED:
             /* ignore junk characters */
             break;
 
@@ -268,6 +268,7 @@ enum queryparse_ret queryparse_parse(struct queryparse *parser,
             c = ASCII_TOLOWER(c);
         case ASCII_CASE_LOWER:
         case ASCII_CASE_DIGIT:
+        case ASCII_CASE_EXTENDED:
             /* push character onto word */
             if (*len < parser->maxwordlen) {
                 word[(*len)++] = c;
@@ -287,7 +288,6 @@ enum queryparse_ret queryparse_parse(struct queryparse *parser,
 
         default:
         case ASCII_CASE_CONTROL:
-        case ASCII_CASE_EXTENDED:
             /* ignore junk characters */
             break;
 
@@ -326,6 +326,7 @@ enum queryparse_ret queryparse_parse(struct queryparse *parser,
             c = ASCII_TOLOWER(c);
         case ASCII_CASE_LOWER:
         case ASCII_CASE_DIGIT:
+        case ASCII_CASE_EXTENDED:
             /* push character onto word */
             if (*len < parser->maxwordlen) {
                 word[(*len)++] = c;
@@ -338,7 +339,6 @@ enum queryparse_ret queryparse_parse(struct queryparse *parser,
             break;
 
         case ASCII_CASE_CONTROL:
-        case ASCII_CASE_EXTENDED:
             /* ignore */
             break;
 
@@ -380,6 +380,7 @@ enum queryparse_ret queryparse_parse(struct queryparse *parser,
             c = ASCII_TOLOWER(c);
         case ASCII_CASE_LOWER:
         case ASCII_CASE_DIGIT:
+        case ASCII_CASE_EXTENDED:
             /* push character onto word */
             if (*len < parser->maxwordlen) {
                 word[(*len)++] = c;
@@ -392,7 +393,6 @@ enum queryparse_ret queryparse_parse(struct queryparse *parser,
             goto inword_nostop_label;
 
         case ASCII_CASE_CONTROL:
-        case ASCII_CASE_EXTENDED:
             /* ignore */
             break;
 
@@ -433,6 +433,7 @@ enum queryparse_ret queryparse_parse(struct queryparse *parser,
             c = ASCII_TOLOWER(c);
         case ASCII_CASE_LOWER:
         case ASCII_CASE_DIGIT:
+        case ASCII_CASE_EXTENDED:
             /* push character onto word */
             if (*len < parser->maxwordlen) {
                 word[(*len)++] = c;
@@ -445,7 +446,6 @@ enum queryparse_ret queryparse_parse(struct queryparse *parser,
             break;
 
         case ASCII_CASE_CONTROL:
-        case ASCII_CASE_EXTENDED:
             /* ignore */
             break;
 
@@ -487,6 +487,7 @@ enum queryparse_ret queryparse_parse(struct queryparse *parser,
             c = ASCII_TOLOWER(c);
         case ASCII_CASE_LOWER:
         case ASCII_CASE_DIGIT:
+        case ASCII_CASE_EXTENDED:
             /* push character onto word */
             if (*len < parser->maxwordlen) {
                 word[(*len)++] = c;
@@ -499,7 +500,6 @@ enum queryparse_ret queryparse_parse(struct queryparse *parser,
             break;
 
         case ASCII_CASE_CONTROL:
-        case ASCII_CASE_EXTENDED:
             /* ignore */
             break;
 
@@ -556,6 +556,7 @@ enum queryparse_ret queryparse_parse(struct queryparse *parser,
 
         case ASCII_CASE_LOWER:
         case ASCII_CASE_DIGIT:
+        case ASCII_CASE_EXTENDED:
             /* push character onto word */
             if (*len < parser->maxwordlen) {
                 word[(*len)++] = c;
@@ -569,7 +570,6 @@ enum queryparse_ret queryparse_parse(struct queryparse *parser,
             goto inword_label;
             break;
 
-        case ASCII_CASE_EXTENDED:
         case ASCII_CASE_CONTROL:
             goto inword_label;
         
@@ -628,6 +628,7 @@ enum queryparse_ret queryparse_parse(struct queryparse *parser,
 
         case ASCII_CASE_LOWER:
         case ASCII_CASE_DIGIT:
+        case ASCII_CASE_EXTENDED:
             /* push character onto word */
             if (*len < parser->maxwordlen) {
                 word[(*len)++] = c;
@@ -641,7 +642,6 @@ enum queryparse_ret queryparse_parse(struct queryparse *parser,
             goto inword_label;
             break;
 
-        case ASCII_CASE_EXTENDED:
         case ASCII_CASE_CONTROL:
             goto inword_label;
 
@@ -684,6 +684,7 @@ enum queryparse_ret queryparse_parse(struct queryparse *parser,
             c = ASCII_TOLOWER(c);
         case ASCII_CASE_LOWER:
         case ASCII_CASE_DIGIT:
+        case ASCII_CASE_EXTENDED:
             /* push character onto word */
             if (*len < parser->maxwordlen) {
                 word[(*len)++] = c;
@@ -709,7 +710,6 @@ enum queryparse_ret queryparse_parse(struct queryparse *parser,
 
         case ASCII_CASE_CONTROL:
         default:
-        case ASCII_CASE_EXTENDED:
             /* phrase word started */
             goto inphrase_word_label;
             break;
@@ -729,6 +729,7 @@ enum queryparse_ret queryparse_parse(struct queryparse *parser,
             c = ASCII_TOLOWER(c);
         case ASCII_CASE_LOWER:
         case ASCII_CASE_DIGIT:
+        case ASCII_CASE_EXTENDED:
             /* push character onto word */
             if (*len < parser->maxwordlen) {
                 word[(*len)++] = c;
@@ -765,7 +766,6 @@ enum queryparse_ret queryparse_parse(struct queryparse *parser,
             }
             break;
 
-        case ASCII_CASE_EXTENDED:
         case ASCII_CASE_CONTROL:
             /* ignore */
             break;
@@ -808,6 +808,7 @@ enum queryparse_ret queryparse_parse(struct queryparse *parser,
             c = ASCII_TOLOWER(c);
         case ASCII_CASE_LOWER:
         case ASCII_CASE_DIGIT:
+        case ASCII_CASE_EXTENDED:
             /* push character onto word */
             if (*len < parser->maxwordlen) {
                 word[(*len)++] = c;
@@ -836,7 +837,6 @@ enum queryparse_ret queryparse_parse(struct queryparse *parser,
             break;
 
         case ASCII_CASE_CONTROL:
-        case ASCII_CASE_EXTENDED:
             /* ignore */
             break;
 
diff --git a/src/str.c b/src/str.c
index 9a4bcc0..8e53a95 100644
--- a/src/str.c
+++ b/src/str.c
@@ -43,7 +43,7 @@ int str_cmp(const char *s1, const char *s2) {
         s2++;
     }
 
-    return (unsigned char) *s1 - (unsigned char) *s2;
+    return (char) *s1 - (char) *s2;
 }
 
 int str_ncmp(const char *s1, const char *s2, size_t size) {
@@ -54,7 +54,7 @@ int str_ncmp(const char *s1, const char *s2, size_t size) {
     }
 
     if (size) {
-        return (unsigned char) *s1 - (unsigned char) *s2;
+        return (char) *s1 - (char) *s2;
     } else {
         return 0;
     }
@@ -82,7 +82,7 @@ int str_nncmp(const char *s1, size_t size1, const char *s2, size_t size2) {
     }
 
     if (len) {
-        return (unsigned char) *s1 - (unsigned char) *s2;
+        return (char) *s1 - (char) *s2;
     } else {
         return def;
     }
@@ -463,8 +463,8 @@ int str_casecmp(const char *s1, const char *s2) {
         us2++;
     }
 
-    return (unsigned char) lookup[*us1 + 128] 
-      - (unsigned char) lookup[*us2 + 128];
+    return (char) lookup[*us1 + 128] 
+      - (char) lookup[*us2 + 128];
 }
 
 int str_ncasecmp(const char *s1, const char *s2, 
@@ -479,8 +479,8 @@ int str_ncasecmp(const char *s1, const char *s2,
     }
 
     if (size) {
-        return (unsigned char) lookup[*us1 + 128] 
-          - (unsigned char) lookup[*us2 + 128];
+        return (char) lookup[*us1 + 128] 
+          - (char) lookup[*us2 + 128];
     } else {
         return 0;
     }
diff --git a/src/test/.gitignore b/src/test/.gitignore
new file mode 100644
index 0000000..af8cc8b
--- /dev/null
+++ b/src/test/.gitignore
@@ -0,0 +1 @@
+*_1
diff --git a/tools/.gitignore b/tools/.gitignore
new file mode 100644
index 0000000..5761abc
--- /dev/null
+++ b/tools/.gitignore
@@ -0,0 +1 @@
+*.o