*** chasen-2.2.6/lib/tokenizer.c Sat Mar 10 11:02:43 2001 --- chasen-2.2.6-nmz/lib/tokenizer.c Fri Apr 27 14:19:42 2001 *************** *** 53,59 **** KATAKANA, /* KATAKANA LETTER (SMALL) [A-KE] */ SMALL_KATAKANA, /* KATAKANA LETTER SMALL AIUEO, TU, YAYUYO, WA */ FULL_LATIN, /* FULLWIDTH LATIN (CAPITAL|SMALL) LETTER [A-Z] */ ! HALF_LATIN, /* LATIN (CAPITAL|SMALL) LETTER [A-Z] */ JA_OTHER, }; --- 53,61 ---- KATAKANA, /* KATAKANA LETTER (SMALL) [A-KE] */ SMALL_KATAKANA, /* KATAKANA LETTER SMALL AIUEO, TU, YAYUYO, WA */ FULL_LATIN, /* FULLWIDTH LATIN (CAPITAL|SMALL) LETTER [A-Z] */ ! HALF_LATIN, /* HALFWIDTH LATIN (CAPITAL|SMALL) LETTER [A-Z] */ ! HALF_DIGIT, /* HALFWIDTH DIGIT [0-9] */ ! HALF_PUNCT, /* HALFWIDTH PUNCTUATION */ JA_OTHER, }; *************** *** 318,326 **** { if (state == JA_SPACE) { tok->_anno_type[cursor] = -1; ! } else if ((state == HALF_LATIN) || ! (state == FULL_LATIN)) { ! ; /* do nothing */ } else if (((*state0 == KATAKANA) && ((state == PROLONGED) || (state == SMALL_KATAKANA))) || --- 320,331 ---- { if (state == JA_SPACE) { tok->_anno_type[cursor] = -1; ! } else if (state == HALF_LATIN) { ! ; ! } else if (state == HALF_DIGIT) { ! state = HALF_LATIN; ! } else if (state == HALF_PUNCT) { ! state = HALF_LATIN; } else if (((*state0 == KATAKANA) && ((state == PROLONGED) || (state == SMALL_KATAKANA))) || *************** *** 356,362 **** return HALF_LATIN; } else if (is_space(str[0])) { return JA_SPACE; ! } } else if (mblen == 2) { if ((str[0] == 0xa1) && (str[1] == 0xbc)) { return PROLONGED; --- 361,371 ---- return HALF_LATIN; } else if (is_space(str[0])) { return JA_SPACE; ! } else if (isdigit(str[0])) { ! return HALF_DIGIT; ! } else if (ispunct(str[0])) { ! return HALF_PUNCT; ! } } else if (mblen == 2) { if ((str[0] == 0xa1) && (str[1] == 0xbc)) { return PROLONGED; *************** *** 388,394 **** return HALF_LATIN; } else if (is_space(str[0])) { return JA_SPACE; ! } } else if (mblen == 3) { if ((str[0] == 0xe3) && (str[1] == 0x83) && (str[2] == 0xbc)) { return PROLONGED; --- 397,407 ---- return HALF_LATIN; } else if (is_space(str[0])) { return JA_SPACE; ! } else if (isdigit(str[0])) { ! return HALF_DIGIT; ! } else if (ispunct(str[0])) { ! return HALF_PUNCT; ! } } else if (mblen == 3) { if ((str[0] == 0xe3) && (str[1] == 0x83) && (str[2] == 0xbc)) { return PROLONGED;