`

convert chinese to pinyin

    博客分类:
  • J2EE
 
阅读更多
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.Map;

/**
 * convert Chinese to Pinyin
 * @author Administrator
 *
 */
public class GBKPinyin {
	private final Map<Character, String> dictionary = new HashMap<Character, String>();
	private static GBKPinyin instance;
	
	public GBKPinyin() {
		initDB(); //init the dictionary when construct
	}
	/**
	 * get a app global instance of GBKPinyin
	 * @return
	 */
	public static GBKPinyin getInstance() {
		if (instance == null) {
			instance = new GBKPinyin();
		}
		return instance;
	}
	//init the internal dictionary
	private void initDB() {
		String GBIndex = loadStreamToString(loadResource("GBIndex.txt"));
		String[] pinyinTable = loadStreamToString(loadResource("PinYin.txt")).split("\\s+");
		//every 3 bytes as a group
		String pinyinIndex = loadStreamToString(loadResource("PinYinIndex.txt"));
		int[] index = new int[GBIndex.length()];
		char[] position = new char[3];
		for (int i = 0; i < pinyinIndex.length(); i++) {
			int mod = i % 3;
			switch(mod) {
			case 0:
				position[0] = pinyinIndex.charAt(i);
				break;
			case 1:
				position[1] = pinyinIndex.charAt(i);
				break;
			case 2:
				position[2] = pinyinIndex.charAt(i);
				//do with the position
				String octor = new String(position);
				int pos = Integer.valueOf(octor, 8);
				index[i / 3] = pos;
			}
		}
		//put the Character and pinyin to the dictionary
		for (int i = 0; i < GBIndex.length(); i++) {
			char cn = GBIndex.charAt(i);
			String pinyin = pinyinTable[index[i]];
			dictionary.put(cn, pinyin);
		}
	}
	/**
	 * load resource from class path
	 * @param path
	 * @return
	 */
	private InputStream loadResource(String path) {
		return GBKPinyin.class.getResourceAsStream(path);
	}
	/**
	 * read stream and read as GBK encoding
	 * @param in
	 * @return
	 */
	private String loadStreamToString(InputStream is) {
		StringBuilder sb = new StringBuilder();
		
		String line = null;
		
		try {
			BufferedReader reader = new BufferedReader(new InputStreamReader(is, "GBK"));
			while ((line = reader.readLine()) != null) {
				sb.append(line + "\n");
			}
		} catch (IOException e) {
			e.printStackTrace();
		} finally {
			try {
				is.close();
			} catch (IOException e) {
				e.printStackTrace();
			}
		}
		return sb.toString();
	}
	/**
	 * convert Chinese to pinyin use travel sky rules
	 * A single word
	 * @param chin
	 * @return
	 */
	public String getPinyin(char character) {
		return dictionary.get(character);
	}
	/**
	 * convert a Chinese string to a pinyin string
	 * @param word
	 * @param seprator
	 * @return
	 */
	public String getPinyin(String word, String seprator) {
		StringBuilder sb = new StringBuilder();
		for (int i = 0 ; i < word.length(); i++) {
			String converted = getPinyin(word.charAt(i));
			if (converted != null) {
				sb.append(getPinyin(word.charAt(i)));
			} else {
				//if you cann't translate the character
				sb.append(word.charAt(i)); //this shouldn't happen
			}
			//add seperator or not
			if (seprator != null && !"".equals(seprator) && i < word.length() - 1) {
				sb.append(seprator);
			}
		}
		return sb.toString().trim();
	}
	/**
	 * default: no seprator for the returned pinyin string
	 * @param word
	 * @return
	 */
	public String getPinyin(String word) {
		return getPinyin(word, "");
	}
}


使用:
GBKPinyin.getInstance().getPinyin(memberName).toUpperCase();
分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics