protected Query getFieldQuery(String field, String queryText) throws ParseException {
//需要用analyzer对文本进行分词
TokenStream source;
try {
source = analyzer.reusableTokenStream(field, new StringReader(queryText));
source.reset();
} catch (IOException e) {
source = analyzer.tokenStream(field, new StringReader(queryText));
}
CachingTokenFilter buffer = new CachingTokenFilter(source);
TermAttribute termAtt = null;
PositionIncrementAttribute posIncrAtt = null;
int numTokens = 0;
boolean success = false;
try {
buffer.reset();
success = true;
} catch (IOException e) {
}
//得到TermAttribute和PositionIncrementAttribute,此两项将决定到底产生什么样的Query对象
if (success) {
if (buffer.hasAttribute(TermAttribute.class)) {
termAtt = buffer.getAttribute(TermAttribute.class);
}
if (buffer.hasAttribute(PositionIncrementAttribute.class)) {
posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class);
}
}
int positionCount = 0;
boolean severalTokensAtSamePosition = false;
boolean hasMoreTokens = false;
if (termAtt != null) {
try {
//遍历分词后的所有Token,统计Tokens的个数numTokens,以及positionIncrement的总数,即positionCount。
//当有一次positionIncrement为0的时候,severalTokensAtSamePosition设为true,表示有多个Token处在同一个位置。
hasMoreTokens = buffer.incrementToken();
while (hasMoreTokens) {
numTokens++;
int positionIncrement = (posIncrAtt != null) ? posIncrAtt.getPositionIncrement() : 1;
if (positionIncrement != 0) {
positionCount += positionIncrement;
} else {
severalTokensAtSamePosition = true;
}
hasMoreTokens = buffer.incrementToken();
}
} catch (IOException e) {
}
}
try {
//重设buffer,以便生成phrase查询的时候,term和position可以重新遍历。
buffer.reset();
source.close();
}
catch (IOException e) {
}
if (numTokens == 0)
return null;
else if (numTokens == 1) {
//如果分词后只有一个Token,则生成TermQuery
String term = null;
try {
boolean hasNext = buffer.incrementToken();
term = termAtt.term();
} catch (IOException e) {
}
return newTermQuery(new Term(field, term));
} else {
//如果分词后不只有一个Token
if (severalTokensAtSamePosition) {
//如果有多个Token处于同一个位置
if (positionCount == 1) {
//并且处于同一位置的Token还全部处于第一个位置,则生成BooleanQuery,处于同一位置的Token之间是OR的关系
BooleanQuery q = newBooleanQuery(true);
for (int i = 0; i < numTokens; i++) {
String term = null;
try {
boolean hasNext = buffer.incrementToken();
term = termAtt.term();
} catch (IOException e) {
}
Query currentQuery = newTermQuery(new Term(field, term));
q.add(currentQuery, BooleanClause.Occur.SHOULD);
}
return q;
}
else {
//如果有多个Token处于同一位置,但不是第一个位置,则生成MultiPhraseQuery。
//所谓MultiPhraseQuery即其可以包含多个phrase,其又一个ArrayList<Term[]> termArrays,每一项都是一个Term的数组,属于同一个数组的Term表示在同一个位置。它有函数void add(Term[] terms)一次添加一个数组的Term。比如我们要搜索"microsoft app*",其表示多个phrase,"microsoft apple","microsoft application"都算。此时用QueryParser.parse("\"microsoft app*\"")从而生成PhraseQuery是搜不出microsoft apple和microsoft application的,也不能搜出microsoft app,因为*一旦被引号所引,就不算通配符了。所以必须生成MultiPhraseQuery,首先用add(new Term[]{new Term("field", "microsoft")})将microsoft作为一个Term数组添加进去,然后用add(new Term[]{new Term("field", "app"), new Term("field", "apple"), new Term("field", "application")})作为一个Term数组添加进去(算作同一个位置的),则三者都能搜的出来。
MultiPhraseQuery mpq = newMultiPhraseQuery();
mpq.setSlop(phraseSlop);
List<Term> multiTerms = new ArrayList<Term>();
int position = -1;
for (int i = 0; i < numTokens; i++) {
String term = null;
int positionIncrement = 1;
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
term = termAtt.term();
if (posIncrAtt != null) {
positionIncrement = posIncrAtt.getPositionIncrement();
}
} catch (IOException e) {
}
if (positionIncrement > 0 && multiTerms.size() > 0) {
//如果positionIncrement大于零,说明此Term和前一个Term已经不是同一个位置了,所以原来收集在multiTerms中的Term都算作同一个位置,添加到MultiPhraseQuery中作为一项。并清除multiTerms,以便重新收集相同位置的Term。
if (enablePositionIncrements) {
mpq.add(multiTerms.toArray(new Term[0]),position);
} else {
mpq.add(multiTerms.toArray(new Term[0]));
}
multiTerms.clear();
}
//将此Term收集到multiTerms中。
position += positionIncrement;
multiTerms.add(new Term(field, term));
}
//当遍历完所有的Token,同处于最后一个位置的Term已经收集到multiTerms中了,把他们加到MultiPhraseQuery中作为一项。
if (enablePositionIncrements) {
mpq.add(multiTerms.toArray(new Term[0]),position);
} else {
mpq.add(multiTerms.toArray(new Term[0]));
}
return mpq;
}
}
else {
//如果不存在多个Token处于同一个位置的情况,则直接生成PhraseQuery
PhraseQuery pq = newPhraseQuery();
pq.setSlop(phraseSlop);
int position = -1;
for (int i = 0; i < numTokens; i++) {
String term = null;
int positionIncrement = 1;
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
term = termAtt.term();
if (posIncrAtt != null) {
positionIncrement = posIncrAtt.getPositionIncrement();
}
} catch (IOException e) {
}
if (enablePositionIncrements) {
position += positionIncrement;
pq.add(new Term(field, term),position);
} else {
pq.add(new Term(field, term));
}
}
return pq;
}
}
}
|