`
woxiaoe
  • 浏览: 277053 次
  • 性别: Icon_minigender_1
  • 来自: 长沙
社区版块
存档分类
最新评论

Java版 单词统计

阅读更多

实现功能有统计全部单词数(不记重复)

单词按频率排序输出。

package exam.b;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;


/**
 * @author 小e
 *
 * 2010-5-27 下午10:08:29
 */
public class TextAnalyse {
	class Word{
		String str;
		int num;
		public Word(String str){
			this.str = str;
			num = 1;
		}
		@Override
		public int hashCode() {
			final int prime = 31;
			int result = 1;
			result = prime * result + getOuterType().hashCode();
			result = prime * result + num;
			result = prime * result + ((str == null) ? 0 : str.hashCode());
			return result;
		}
		@Override
		public boolean equals(Object obj) {
			if (this == obj)
				return true;
			if (obj == null)
				return false;
			if (getClass() != obj.getClass())
				return false;
			Word other = (Word) obj;
			if (!getOuterType().equals(other.getOuterType()))
				return false;
			if (num != other.num)
				return false;
			if (str == null) {
				if (other.str != null)
					return false;
			} else if (!str.equals(other.str))
				return false;
			return true;
		}
		private TextAnalyse getOuterType() {
			return TextAnalyse.this;
		}
		public void increase(){
			synchronized (this) {
				num ++;
			}
		}
		
		@Override
		public String toString() {
			// TODO Auto-generated method stub
			return str + "[" + num + "]";
		}
	}
	private String path;
	private int wordNums;//出现的单词个数
	private Map<String, Word> wordsMap; 
	private List<String> onceWords;
	private List<Word> allWord;
	public TextAnalyse(String path) {
		this.path = path;
		wordsMap = new HashMap<String, Word>();
		allWord = new ArrayList<Word>();
	}
	
	
	//单词的比较器
	Comparator<Word> wordComparator = new Comparator<Word>() {

		@Override
		public int compare(Word w1,Word w2) {
			return w2.num - w1.num;
		}
	};
	/**
	 * 文本分析
	 */
	public void analyse(){
		try {
			RandomAccessFile rf = new RandomAccessFile(path,"rw");
			String str;
			String words[];
			while((str = rf.readLine()) != null){
				words = str.split("\\s+");
				add2Set(words);
			}
			Collections.sort(allWord, wordComparator);//对所有单词按出现次数排序
		} catch (FileNotFoundException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}
	/**
	 * 返回单词出现的次数
	 * @return
	 */
	public int getWordsNum(){
		return wordNums;
	}
	
	/**
	 * 得到只出现一次的单词
	 * @return
	 */
	public List<String> getOnceWords(){
		if(onceWords == null){
			onceWords = new ArrayList<String>();
			for(String str : wordsMap.keySet()){
				if(wordsMap.get(str).num == 1){
					onceWords.add(str);
				}
			}
		}
		return onceWords;
	}
	public List<Word> getAllWord(){
		return allWord;
	}
	 
	private void add2Set(String[] words) {
		for(String str : words){
			Word word = new Word(str);
			if(wordsMap.containsKey(str)){
				wordsMap.get(str).increase();//单词个数自增
			}else{
				wordsMap.put(str, word);
				allWord.add(word);
				wordNums ++;
			}
		}
	}
	
	public static void main(String[] args) {
		TextAnalyse ta = new TextAnalyse("words.txt");
		ta.analyse();
		System.out.format("文中共出现单词%d次\n", ta.getWordsNum());
		System.out.println("出现一次的单词");
		for(String word : ta.getOnceWords()){
			System.out.print(word + " ");
		}
		System.out.println("单词按频率从高到底排序");
		for(Word word : ta.allWord){
			System.out.println(word);
		}
	}
	
}

 

分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics