Skip to content

Commit

Permalink
fix small problems
Browse files Browse the repository at this point in the history
  • Loading branch information
jsksxs360 committed Aug 4, 2021
1 parent 4619c48 commit ebc0f17
Showing 1 changed file with 18 additions and 3 deletions.
21 changes: 18 additions & 3 deletions src/me/xiaosheng/chnlp/seg/Segment.java
Original file line number Diff line number Diff line change
Expand Up @@ -122,15 +122,26 @@ public static List<List<Term>> seg2sentence(String segType, boolean shortest, St
return results;
}

private static boolean isWhitespace(char[] string) {
for (char c : string)
if (!Character.isWhitespace(c) && c != ' ')
return false;
return true;
}

/**
* 获得词语列表
* @param termList 分词结果
* @return 词语列表
*/
public static List<String> getWordList(List<Term> termList) {
List<String> wordList = new ArrayList<String>();
for (Term term : termList)
for (Term term : termList) {
String worStr = term.word.trim();
if (worStr.isEmpty() || isWhitespace(worStr.toCharArray()))
continue;
wordList.add(term.word);
}
return wordList;
}

Expand All @@ -141,8 +152,12 @@ public static List<String> getWordList(List<Term> termList) {
*/
public static List<String> getNatureList(List<Term> termList) {
List<String> NatureList = new ArrayList<String>();
for (Term term : termList)
for (Term term : termList) {
String worStr = term.word.trim();
if (worStr.isEmpty() || isWhitespace(worStr.toCharArray()))
continue;
NatureList.add(term.nature.toString());
}
return NatureList;
}

Expand All @@ -167,7 +182,7 @@ private static List<String> splitSentence(char[] chars, boolean shortest) {
StringBuilder sb = new StringBuilder();
List<String> sentences = new LinkedList<String>();
for (int i = 0; i < chars.length; i++) {
if (sb.length() == 0 && (Character.isWhitespace(chars[i]) || chars[i] == ' '))
if (sb.length() == 0 && (Character.isWhitespace(chars[i]) || chars[i] == ' ' || chars[i] == ' '))
continue;
sb.append(chars[i]);
switch (chars[i]) {
Expand Down

0 comments on commit ebc0f17

Please sign in to comment.