From 2886a914d5a921d32157ec4672e45406e7ce8e5e Mon Sep 17 00:00:00 2001 From: bigml Date: Thu, 8 Sep 2011 17:41:31 +0800 Subject: [PATCH] utf8 - utf8 chinese character index and query ok(except word divide) --- .gitignore | 36 ++++++++++++++++++++++++ src/.gitignore | 2 ++ src/include/.gitignore | 2 ++ src/mlparse.c | 64 +++++++++++++++++++++--------------------- src/queryparse.c | 24 ++++++++-------- src/str.c | 14 ++++----- src/test/.gitignore | 1 + tools/.gitignore | 1 + 8 files changed, 93 insertions(+), 51 deletions(-) create mode 100644 .gitignore create mode 100644 src/.gitignore create mode 100644 src/include/.gitignore create mode 100644 src/test/.gitignore create mode 100644 tools/.gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c928c3e --- /dev/null +++ b/.gitignore @@ -0,0 +1,36 @@ +cscope.files +cscope.out +index.* +.libs/ +.deps/ +.dirstamp + +Makefile +btree +chash.test +config.log +config.status +hashtime +lcrand.test +libtool +libzet.la +mime.test +mlparse.test +mlparse_wrap.test +objalloc.test +poolalloc.test +psettings_gen +psettings_gen.test +setup.py + +zet +zet_cat +zet_dict +zet_diff +zet_file +zet_impactify +zet_trec + +staticalloc.test +stem.test +stop_gen diff --git a/src/.gitignore b/src/.gitignore new file mode 100644 index 0000000..7f18ac5 --- /dev/null +++ b/src/.gitignore @@ -0,0 +1,2 @@ +*.o +*.lo diff --git a/src/include/.gitignore b/src/include/.gitignore new file mode 100644 index 0000000..f611548 --- /dev/null +++ b/src/include/.gitignore @@ -0,0 +1,2 @@ +config.h +stamp-h1 diff --git a/src/mlparse.c b/src/mlparse.c index 3c91c1a..f42c1c1 100644 --- a/src/mlparse.c +++ b/src/mlparse.c @@ -1122,6 +1122,7 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, case ASCII_CASE_LOWER: case ASCII_CASE_DIGIT: + case ASCII_CASE_EXTENDED: word[0] = *pos++; st->len = 1; goto word_label; @@ -1136,7 +1137,6 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, } break; - case ASCII_CASE_EXTENDED: default: if (strip) { /* ignore junk as input */ @@ -1240,6 +1240,7 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, case ASCII_CASE_LOWER: case ASCII_CASE_DIGIT: + case ASCII_CASE_EXTENDED: /* push character onto word */ PUSH(*pos++, MLPARSE_WORD, STATE_WORD); break; @@ -1260,7 +1261,6 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, *length = st->len; RETURN(MLPARSE_WORD, STATE_TOPLEVEL); - case ASCII_CASE_EXTENDED: case ASCII_CASE_CONTROL: if (strip) { /* ignore junk in words */ @@ -1373,6 +1373,7 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, case ASCII_CASE_LOWER: case ASCII_CASE_DIGIT: + case ASCII_CASE_EXTENDED: /* push character onto word */ PUSH(*pos++, MLPARSE_WORD, STATE_PUNC); goto word_label; @@ -1392,7 +1393,6 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, *length = st->len; RETURN(MLPARSE_WORD, STATE_TOPLEVEL); - case ASCII_CASE_EXTENDED: case ASCII_CASE_CONTROL: if (strip) { /* ignore junk in words */ @@ -1503,11 +1503,11 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, case ASCII_CASE_LOWER: case ASCII_CASE_DIGIT: + case ASCII_CASE_EXTENDED: /* push character onto word */ PUSH(*pos++, MLPARSE_WORD, STATE_ACRONYM); goto word_label; - case ASCII_CASE_EXTENDED: default: /* anything else, let the word state deal with it */ goto word_label; @@ -1562,11 +1562,11 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, case ASCII_CASE_LOWER: case ASCII_CASE_DIGIT: + case ASCII_CASE_EXTENDED: /* acronym ends, push character onto word */ PUSH(*pos++, MLPARSE_WORD, STATE_ACRONYM_LETTER); goto word_label; - case ASCII_CASE_EXTENDED: default: /* anything else, let the punc state deal with it */ goto punc_label; @@ -1604,7 +1604,6 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, while (tmppos < tmpend) { switch (*tmppos) { default: - case ASCII_CASE_EXTENDED: case ASCII_CASE_SPACE: /* can't have space or other junk directly after tag open, * its not a tag */ @@ -1635,6 +1634,7 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, case '/': case '?': case ASCII_CASE_LOWER: case ASCII_CASE_DIGIT: + case ASCII_CASE_EXTENDED: /* pretty much anything else qualifies, loosely speaking */ st->tagbuf[0] = *tmppos; st->tagbuflen = 1; @@ -1649,7 +1649,6 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, while (tmppos < tmpend) { switch (*tmppos) { default: - case ASCII_CASE_EXTENDED: /* don't accept junk in tag names */ JUMP(st->next_state, st->flags & ~FLAG_BUFFER, 0); @@ -1687,6 +1686,7 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, case '.': case '-': case '_': case ':': case ASCII_CASE_LOWER: case ASCII_CASE_DIGIT: + case ASCII_CASE_EXTENDED: /* pretty much anything else qualifies, loosely speaking */ if (st->tagbuflen < st->wordlen) { st->tagbuf[st->tagbuflen++] = *tmppos; @@ -1706,7 +1706,6 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, while (tmppos < tmpend) { switch (*tmppos) { default: - case ASCII_CASE_EXTENDED: /* don't accept junk in tag names */ JUMP(st->next_state, st->flags & ~FLAG_BUFFER, 0); @@ -1726,6 +1725,7 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, case '.': case '-': case '_': case ':': case ASCII_CASE_LOWER: case ASCII_CASE_DIGIT: + case ASCII_CASE_EXTENDED: /* pretty much anything else qualifies, loosely speaking */ tmppos++; st->count++; @@ -1813,6 +1813,7 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, /* fallthrough */ case ASCII_CASE_LOWER: case ASCII_CASE_DIGIT: + case ASCII_CASE_EXTENDED: /* add character to tag name */ PUSH(*pos++, MLPARSE_TAG, STATE_TAG_NAME_CONT); break; @@ -1827,7 +1828,6 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, CANT_GET_HERE(); default: - case ASCII_CASE_EXTENDED: /* ignore anything else */ pos++; break; @@ -1877,6 +1877,7 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, /* fallthrough */ case ASCII_CASE_LOWER: case ASCII_CASE_DIGIT: + case ASCII_CASE_EXTENDED: /* add character to tag name */ word[0] = *pos++; st->len = 1; @@ -1892,7 +1893,6 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, pos++; goto pval_dquot_label; - case ASCII_CASE_EXTENDED: default: /* ignore everything else */ pos++; @@ -1968,11 +1968,11 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, /* fallthrough */ case ASCII_CASE_LOWER: case ASCII_CASE_DIGIT: + case ASCII_CASE_EXTENDED: /* add character to tag name */ PUSH(*pos++, MLPARSE_PARAM, STATE_PARAM); break; - case ASCII_CASE_EXTENDED: default: /* ignore junk */ pos++; @@ -2015,12 +2015,12 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, /* fallthrough */ case ASCII_CASE_LOWER: case ASCII_CASE_DIGIT: + case ASCII_CASE_EXTENDED: /* start of a different parameter */ word[0] = *pos++; st->len = 1; goto param_label; - case ASCII_CASE_EXTENDED: default: case ASCII_CASE_SPACE: /* ignore */ @@ -2062,12 +2062,12 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, case ASCII_CASE_LOWER: case ASCII_CASE_DIGIT: + case ASCII_CASE_EXTENDED: /* start of a whitespace delimited parameter value */ word[0] = *pos++; st->len = 1; goto pval_label; - case ASCII_CASE_EXTENDED: case ASCII_CASE_SPACE: default: /* ignore */ @@ -2109,6 +2109,7 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, case ASCII_CASE_LOWER: case ASCII_CASE_DIGIT: + case ASCII_CASE_EXTENDED: /* push character onto pval */ PUSH(*pos++, MLPARSE_PARAMVAL, STATE_PVAL); break; @@ -2125,7 +2126,6 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, st->next_state = STATE_PVAL; goto pval_selfend_label; - case ASCII_CASE_EXTENDED: case ASCII_CASE_CONTROL: /* ignore junk */ pos++; @@ -2167,6 +2167,7 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, case ASCII_CASE_LOWER: case ASCII_CASE_DIGIT: + case ASCII_CASE_EXTENDED: /* push character onto word */ PUSH(*pos++, MLPARSE_PARAMVAL, STATE_PVAL_PUNC); goto pval_label; @@ -2183,7 +2184,6 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, st->next_state = STATE_PVAL_PUNC; goto pval_selfend_label; - case ASCII_CASE_EXTENDED: case ASCII_CASE_CONTROL: /* ignore junk */ pos++; @@ -2234,6 +2234,7 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, case ASCII_CASE_LOWER: case ASCII_CASE_DIGIT: + case ASCII_CASE_EXTENDED: /* push character onto pval */ word[0] = *pos++; st->len = 1; @@ -2259,7 +2260,6 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, } break; - case ASCII_CASE_EXTENDED: case ASCII_CASE_CONTROL: /* ignore */ pos++; @@ -2310,6 +2310,7 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, case ASCII_CASE_LOWER: case ASCII_CASE_DIGIT: + case ASCII_CASE_EXTENDED: /* push character onto pval */ word[0] = *tmppos; st->len = 1; @@ -2331,7 +2332,6 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, } break; - case ASCII_CASE_EXTENDED: case ASCII_CASE_CONTROL: /* ignore */ break; @@ -2379,6 +2379,7 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, case ASCII_CASE_LOWER: case ASCII_CASE_DIGIT: + case ASCII_CASE_EXTENDED: /* push character onto pval */ PUSH(*pos++, MLPARSE_PARAMVAL, STATE_PVAL_QUOT_WORD); goto pval_quot_word_label; @@ -2388,7 +2389,6 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, *length = st->len; RETURN(MLPARSE_PARAMVAL, STATE_PVAL_QUOT); - case ASCII_CASE_EXTENDED: case ASCII_CASE_CONTROL: /* ignore */ pos++; @@ -2449,6 +2449,7 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, case ASCII_CASE_LOWER: case ASCII_CASE_DIGIT: + case ASCII_CASE_EXTENDED: /* push character onto pval */ PUSH(*tmppos, MLPARSE_PARAMVAL, STATE_PVAL_QUOT_WORD_ESC); break; @@ -2458,7 +2459,6 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, *length = st->len; RETURN(MLPARSE_PARAMVAL, STATE_PVAL_QUOT); - case ASCII_CASE_EXTENDED: case ASCII_CASE_CONTROL: /* ignore */ break; @@ -2508,6 +2508,7 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, case ASCII_CASE_LOWER: case ASCII_CASE_DIGIT: + case ASCII_CASE_EXTENDED: /* push character onto pval */ PUSH(*pos++, MLPARSE_PARAMVAL, STATE_PVAL_QUOT_PUNC); goto pval_quot_word_label; @@ -2517,7 +2518,6 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, *length = st->len; RETURN(MLPARSE_PARAMVAL, STATE_PVAL_QUOT); - case ASCII_CASE_EXTENDED: case ASCII_CASE_CONTROL: /* ignore */ pos++; @@ -2572,6 +2572,7 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, case ASCII_CASE_LOWER: case ASCII_CASE_DIGIT: + case ASCII_CASE_EXTENDED: /* push character onto pval */ PUSH(*tmppos, MLPARSE_PARAMVAL, STATE_PVAL_QUOT_PUNC_ESC); goto pval_quot_word_label; @@ -2581,7 +2582,6 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, *length = st->len; RETURN(MLPARSE_PARAMVAL, STATE_PVAL_QUOT); - case ASCII_CASE_EXTENDED: case ASCII_CASE_CONTROL: /* ignore */ break; @@ -2696,6 +2696,7 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, case ASCII_CASE_LOWER: case ASCII_CASE_DIGIT: + case ASCII_CASE_EXTENDED: /* push character onto pval */ word[0] = *pos++; st->len = 1; @@ -2721,7 +2722,6 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, } break; - case ASCII_CASE_EXTENDED: case ASCII_CASE_CONTROL: /* ignore */ pos++; @@ -2775,6 +2775,7 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, case ASCII_CASE_LOWER: case ASCII_CASE_DIGIT: + case ASCII_CASE_EXTENDED: /* push character onto pval */ word[0] = *tmppos; st->len = 1; @@ -2790,7 +2791,6 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, } break; - case ASCII_CASE_EXTENDED: case ASCII_CASE_CONTROL: /* ignore */ break; @@ -2838,6 +2838,7 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, case ASCII_CASE_LOWER: case ASCII_CASE_DIGIT: + case ASCII_CASE_EXTENDED: /* push character onto pval */ PUSH(*pos++, MLPARSE_PARAMVAL, STATE_PVAL_DQUOT_WORD); goto pval_dquot_word_label; @@ -2847,7 +2848,6 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, *length = st->len; RETURN(MLPARSE_PARAMVAL, STATE_PVAL_DQUOT); - case ASCII_CASE_EXTENDED: case ASCII_CASE_CONTROL: /* ignore */ pos++; @@ -2900,6 +2900,7 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, case ASCII_CASE_LOWER: case ASCII_CASE_DIGIT: + case ASCII_CASE_EXTENDED: /* push character onto pval */ PUSH(*tmppos, MLPARSE_PARAMVAL, STATE_PVAL_DQUOT_WORD_ESC); goto pval_dquot_word_label; @@ -2909,7 +2910,6 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, *length = st->len; RETURN(MLPARSE_PARAMVAL, STATE_PVAL_DQUOT); - case ASCII_CASE_EXTENDED: case ASCII_CASE_CONTROL: /* ignore */ break; @@ -2963,11 +2963,11 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, case ASCII_CASE_LOWER: case ASCII_CASE_DIGIT: + case ASCII_CASE_EXTENDED: /* push character onto pval */ PUSH(*pos++, MLPARSE_PARAMVAL, STATE_PVAL_DQUOT_PUNC); goto pval_dquot_word_label; - case ASCII_CASE_EXTENDED: case ASCII_CASE_CONTROL: /* ignore */ pos++; @@ -3026,11 +3026,11 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, case ASCII_CASE_LOWER: case ASCII_CASE_DIGIT: + case ASCII_CASE_EXTENDED: /* push character onto pval */ PUSH(*tmppos, MLPARSE_PARAMVAL, STATE_PVAL_DQUOT_PUNC_ESC); goto pval_dquot_word_label; - case ASCII_CASE_EXTENDED: case ASCII_CASE_CONTROL: /* ignore */ break; @@ -3125,6 +3125,7 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, case '.': case '-': case '_': case ':': case ASCII_CASE_LOWER: case ASCII_CASE_DIGIT: + case ASCII_CASE_EXTENDED: st->erefbuf[0] = *tmppos++; st->count = 1; goto eref_peek_label; @@ -3135,7 +3136,6 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, goto eref_num_peek_first_label; default: - case ASCII_CASE_EXTENDED: /* shouldn't get anything else */ JUMP(st->next_state, st->flags & ~FLAG_BUFFER, 0); } @@ -3284,6 +3284,7 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, case '.': case '-': case '_': case ':': case ASCII_CASE_LOWER: case ASCII_CASE_DIGIT: + case ASCII_CASE_EXTENDED: st->erefbuf[st->count++] = *tmppos++; break; @@ -3360,7 +3361,6 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, } /* fallthrough for failed conversions */ - case ASCII_CASE_EXTENDED: default: /* anything else isn't valid */ JUMP(st->next_state, st->flags & ~FLAG_BUFFER, 0); @@ -3581,6 +3581,7 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, case ASCII_CASE_LOWER: case ASCII_CASE_DIGIT: + case ASCII_CASE_EXTENDED: /* push character onto word */ word[0] = *pos++; st->len = 1; @@ -3618,7 +3619,6 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, } break; - case ASCII_CASE_EXTENDED: case ASCII_CASE_CONTROL: if (strip) { /* ignore junk in words */ @@ -3646,6 +3646,7 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, case ASCII_CASE_LOWER: case ASCII_CASE_DIGIT: + case ASCII_CASE_EXTENDED: /* push character onto word */ PUSH(*pos++, MLPARSE_WORD, STATE_WORD); break; @@ -3677,7 +3678,6 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, } goto ccdata_punc_label; - case ASCII_CASE_EXTENDED: case ASCII_CASE_CONTROL: if (strip) { /* ignore junk in words */ @@ -3716,6 +3716,7 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, case ASCII_CASE_LOWER: case ASCII_CASE_DIGIT: + case ASCII_CASE_EXTENDED: /* push character onto word */ PUSH(*pos++, MLPARSE_WORD, STATE_PUNC); goto ccdata_word_label; @@ -3726,7 +3727,6 @@ int mlparse_parse(struct mlparse *parser, char *word, unsigned int *length, *length = st->len; RETURN(MLPARSE_WORD, STATE_TOPLEVEL); - case ASCII_CASE_EXTENDED: case ASCII_CASE_CONTROL: if (strip) { /* ignore junk in words */ diff --git a/src/queryparse.c b/src/queryparse.c index 3f11b17..857b7a6 100644 --- a/src/queryparse.c +++ b/src/queryparse.c @@ -153,6 +153,7 @@ enum queryparse_ret queryparse_parse(struct queryparse *parser, case ASCII_CASE_LOWER: case ASCII_CASE_DIGIT: + case ASCII_CASE_EXTENDED: /* push character onto word */ if (*len < parser->maxwordlen) { word[(*len)++] = c; @@ -192,7 +193,6 @@ enum queryparse_ret queryparse_parse(struct queryparse *parser, case '(': parser->warn |= QUERYPARSE_WARN_PARENS_BOOLEAN; /*warn,fallthrough*/ default: - case ASCII_CASE_EXTENDED: /* anything else we don't record in the word, but go to inword * anyway (so we know when a string of junk characters occurred) */ goto inword_label; @@ -213,6 +213,7 @@ enum queryparse_ret queryparse_parse(struct queryparse *parser, c = ASCII_TOLOWER(c); case ASCII_CASE_LOWER: case ASCII_CASE_DIGIT: + case ASCII_CASE_EXTENDED: /* push character onto word */ if (*len < parser->maxwordlen) { word[(*len)++] = c; @@ -230,7 +231,6 @@ enum queryparse_ret queryparse_parse(struct queryparse *parser, goto punc_label; case ASCII_CASE_CONTROL: - case ASCII_CASE_EXTENDED: /* ignore junk characters */ break; @@ -268,6 +268,7 @@ enum queryparse_ret queryparse_parse(struct queryparse *parser, c = ASCII_TOLOWER(c); case ASCII_CASE_LOWER: case ASCII_CASE_DIGIT: + case ASCII_CASE_EXTENDED: /* push character onto word */ if (*len < parser->maxwordlen) { word[(*len)++] = c; @@ -287,7 +288,6 @@ enum queryparse_ret queryparse_parse(struct queryparse *parser, default: case ASCII_CASE_CONTROL: - case ASCII_CASE_EXTENDED: /* ignore junk characters */ break; @@ -326,6 +326,7 @@ enum queryparse_ret queryparse_parse(struct queryparse *parser, c = ASCII_TOLOWER(c); case ASCII_CASE_LOWER: case ASCII_CASE_DIGIT: + case ASCII_CASE_EXTENDED: /* push character onto word */ if (*len < parser->maxwordlen) { word[(*len)++] = c; @@ -338,7 +339,6 @@ enum queryparse_ret queryparse_parse(struct queryparse *parser, break; case ASCII_CASE_CONTROL: - case ASCII_CASE_EXTENDED: /* ignore */ break; @@ -380,6 +380,7 @@ enum queryparse_ret queryparse_parse(struct queryparse *parser, c = ASCII_TOLOWER(c); case ASCII_CASE_LOWER: case ASCII_CASE_DIGIT: + case ASCII_CASE_EXTENDED: /* push character onto word */ if (*len < parser->maxwordlen) { word[(*len)++] = c; @@ -392,7 +393,6 @@ enum queryparse_ret queryparse_parse(struct queryparse *parser, goto inword_nostop_label; case ASCII_CASE_CONTROL: - case ASCII_CASE_EXTENDED: /* ignore */ break; @@ -433,6 +433,7 @@ enum queryparse_ret queryparse_parse(struct queryparse *parser, c = ASCII_TOLOWER(c); case ASCII_CASE_LOWER: case ASCII_CASE_DIGIT: + case ASCII_CASE_EXTENDED: /* push character onto word */ if (*len < parser->maxwordlen) { word[(*len)++] = c; @@ -445,7 +446,6 @@ enum queryparse_ret queryparse_parse(struct queryparse *parser, break; case ASCII_CASE_CONTROL: - case ASCII_CASE_EXTENDED: /* ignore */ break; @@ -487,6 +487,7 @@ enum queryparse_ret queryparse_parse(struct queryparse *parser, c = ASCII_TOLOWER(c); case ASCII_CASE_LOWER: case ASCII_CASE_DIGIT: + case ASCII_CASE_EXTENDED: /* push character onto word */ if (*len < parser->maxwordlen) { word[(*len)++] = c; @@ -499,7 +500,6 @@ enum queryparse_ret queryparse_parse(struct queryparse *parser, break; case ASCII_CASE_CONTROL: - case ASCII_CASE_EXTENDED: /* ignore */ break; @@ -556,6 +556,7 @@ enum queryparse_ret queryparse_parse(struct queryparse *parser, case ASCII_CASE_LOWER: case ASCII_CASE_DIGIT: + case ASCII_CASE_EXTENDED: /* push character onto word */ if (*len < parser->maxwordlen) { word[(*len)++] = c; @@ -569,7 +570,6 @@ enum queryparse_ret queryparse_parse(struct queryparse *parser, goto inword_label; break; - case ASCII_CASE_EXTENDED: case ASCII_CASE_CONTROL: goto inword_label; @@ -628,6 +628,7 @@ enum queryparse_ret queryparse_parse(struct queryparse *parser, case ASCII_CASE_LOWER: case ASCII_CASE_DIGIT: + case ASCII_CASE_EXTENDED: /* push character onto word */ if (*len < parser->maxwordlen) { word[(*len)++] = c; @@ -641,7 +642,6 @@ enum queryparse_ret queryparse_parse(struct queryparse *parser, goto inword_label; break; - case ASCII_CASE_EXTENDED: case ASCII_CASE_CONTROL: goto inword_label; @@ -684,6 +684,7 @@ enum queryparse_ret queryparse_parse(struct queryparse *parser, c = ASCII_TOLOWER(c); case ASCII_CASE_LOWER: case ASCII_CASE_DIGIT: + case ASCII_CASE_EXTENDED: /* push character onto word */ if (*len < parser->maxwordlen) { word[(*len)++] = c; @@ -709,7 +710,6 @@ enum queryparse_ret queryparse_parse(struct queryparse *parser, case ASCII_CASE_CONTROL: default: - case ASCII_CASE_EXTENDED: /* phrase word started */ goto inphrase_word_label; break; @@ -729,6 +729,7 @@ enum queryparse_ret queryparse_parse(struct queryparse *parser, c = ASCII_TOLOWER(c); case ASCII_CASE_LOWER: case ASCII_CASE_DIGIT: + case ASCII_CASE_EXTENDED: /* push character onto word */ if (*len < parser->maxwordlen) { word[(*len)++] = c; @@ -765,7 +766,6 @@ enum queryparse_ret queryparse_parse(struct queryparse *parser, } break; - case ASCII_CASE_EXTENDED: case ASCII_CASE_CONTROL: /* ignore */ break; @@ -808,6 +808,7 @@ enum queryparse_ret queryparse_parse(struct queryparse *parser, c = ASCII_TOLOWER(c); case ASCII_CASE_LOWER: case ASCII_CASE_DIGIT: + case ASCII_CASE_EXTENDED: /* push character onto word */ if (*len < parser->maxwordlen) { word[(*len)++] = c; @@ -836,7 +837,6 @@ enum queryparse_ret queryparse_parse(struct queryparse *parser, break; case ASCII_CASE_CONTROL: - case ASCII_CASE_EXTENDED: /* ignore */ break; diff --git a/src/str.c b/src/str.c index 9a4bcc0..8e53a95 100644 --- a/src/str.c +++ b/src/str.c @@ -43,7 +43,7 @@ int str_cmp(const char *s1, const char *s2) { s2++; } - return (unsigned char) *s1 - (unsigned char) *s2; + return (char) *s1 - (char) *s2; } int str_ncmp(const char *s1, const char *s2, size_t size) { @@ -54,7 +54,7 @@ int str_ncmp(const char *s1, const char *s2, size_t size) { } if (size) { - return (unsigned char) *s1 - (unsigned char) *s2; + return (char) *s1 - (char) *s2; } else { return 0; } @@ -82,7 +82,7 @@ int str_nncmp(const char *s1, size_t size1, const char *s2, size_t size2) { } if (len) { - return (unsigned char) *s1 - (unsigned char) *s2; + return (char) *s1 - (char) *s2; } else { return def; } @@ -463,8 +463,8 @@ int str_casecmp(const char *s1, const char *s2) { us2++; } - return (unsigned char) lookup[*us1 + 128] - - (unsigned char) lookup[*us2 + 128]; + return (char) lookup[*us1 + 128] + - (char) lookup[*us2 + 128]; } int str_ncasecmp(const char *s1, const char *s2, @@ -479,8 +479,8 @@ int str_ncasecmp(const char *s1, const char *s2, } if (size) { - return (unsigned char) lookup[*us1 + 128] - - (unsigned char) lookup[*us2 + 128]; + return (char) lookup[*us1 + 128] + - (char) lookup[*us2 + 128]; } else { return 0; } diff --git a/src/test/.gitignore b/src/test/.gitignore new file mode 100644 index 0000000..af8cc8b --- /dev/null +++ b/src/test/.gitignore @@ -0,0 +1 @@ +*_1 diff --git a/tools/.gitignore b/tools/.gitignore new file mode 100644 index 0000000..5761abc --- /dev/null +++ b/tools/.gitignore @@ -0,0 +1 @@ +*.o