`
xiang37
  • 浏览: 414203 次
  • 性别: Icon_minigender_1
  • 来自: 南京
社区版块
存档分类
最新评论

改进后的归并排序,对大文件归并排序

 
阅读更多

针对大文件,一次无法全部读入内存,可以采用将内容保存到文件的方式,进行归并;

可以改进之处,即分割文件,多线程,多机器处理;可以大大提高效率

 

package com.xiva.demo.sort;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.LineNumberReader;
import java.util.Arrays;
import java.util.LinkedList;

import org.apache.commons.io.IOUtils;

public class SortPractice<E extends Comparable<E>>
{
    
    private static int fileTempNum = 0;
    
    /**
     * 归并排序
     * @param array
     * @param low
     * @param high [参数说明]
     * 
     * @return void [返回类型说明]
     * @exception throws [违例类型] [违例说明]
     * @see [类、类#方法、类#成员]
     */
    @SuppressWarnings("unchecked")
    public void merge(E[] array, int low, int high)
    {
        int mid = (high + low) / 2;
        int s1 = low;
        int s2 = mid + 1;
        
        Object[] tempArr = new Object[high - low + 1];
        int tempIdx = 0;
        while (s1 <= mid && s2 <= high)
        {
            if (array[s1].compareTo(array[s2]) <= 0)
            {
                tempArr[tempIdx] = array[s1++];
                tempIdx++;
            }
            else
            {
                tempArr[tempIdx] = array[s2++];
                tempIdx++;
            }
        }
        
        if (s1 <= mid)
        {
            for (int i = s1; i <= mid; i++)
            {
                tempArr[tempIdx] = array[i];
                tempIdx++;
            }
        }
        
        if (s2 <= high)
        {
            for (int i = s2; i <= high; i++)
            {
                tempArr[tempIdx] = array[i];
                tempIdx++;
            }
        }
        
        for (int i = 0; i < tempArr.length; i++)
        {
            array[low + i] = (E)tempArr[i];
        }
    }
    
    public void mergeSort(E[] array, int low, int high)
    {
        
        if (low < high)
        {
            mergeSort(array, low, (high + low) / 2);
            mergeSort(array, (high + low) / 2 + 1, high);
            merge(array, low, high);
        }
    }
    
    public static void testmerge(int maxLineRead) throws IOException
    {
        
        File file = new File("E:\\data\\msisdn.txt");
        LinkedList<String> numList = new LinkedList<String>();
        BufferedReader bReader = new BufferedReader(new FileReader(file));
        
        LineNumberReader lineReader = new LineNumberReader(new FileReader(file));
        int tempFileNum = getLineNumber(lineReader, maxLineRead);
        
        File[] fileArray = new File[tempFileNum];
        
        for (int i = 0; i < tempFileNum; i++)
        {
            File tempFile = new File("E:\\data\\msisdnSort" + i + ".txt");
            fileArray[i] = tempFile;
            
            if (!tempFile.exists())
            {
                tempFile.createNewFile();
            }
        }
        
        String line = bReader.readLine();
        
        long startTime = System.currentTimeMillis();
        int lineIndex = 1;
        int fileIndex = 0;
        while (line != null)
        {
            String trimLine = line.trim();
            if (!trimLine.equals(""))
            {
                numList.add(trimLine);
            }
            line = bReader.readLine();
            lineIndex++;
            if (lineIndex > maxLineRead)
            {
                lineIndex = 1;
                FileOutputStream oStream = new FileOutputStream(
                        fileArray[fileIndex], true);
                outputResult2File(numList, oStream);
                fileIndex++;
                numList = new LinkedList<String>();
            }
        }
        
        if (numList.size() > 0)
        {
            FileOutputStream oStream = new FileOutputStream(
                    fileArray[fileIndex], true);
            outputResult2File(numList, oStream);
        }
        long endTime = System.currentTimeMillis();
        System.out.println("MergeSort Time:" + (endTime - startTime));
        mergeSortUseFile(fileArray);
        long outTime = System.currentTimeMillis();
        System.out.println("MergeFile Time:" + (outTime - endTime));
    }
    
    private static int getLineNumber(LineNumberReader lineReader,
            int maxLineRead) throws IOException
    {
        
        String line = lineReader.readLine();
        
        while (line != null)
        {
            line = lineReader.readLine();
        }
        
        int lineNum = lineReader.getLineNumber();
        int tempFileNum = lineNum / maxLineRead;
        
        if (lineNum % maxLineRead != 0)
        {
            tempFileNum = tempFileNum + 1;
        }
        return tempFileNum;
    }
    
    private static void outputResult2File(LinkedList<String> numList,
            FileOutputStream oStream) throws IOException
    {
        SortPractice<String> sort = new SortPractice<String>();
        
        int size = numList.size();
        String[] array = new String[size];
        
        for (int j = 0; j < size; j++)
        {
            array[j] = numList.removeFirst();
        }
        
        sort.mergeSort(array, 0, size - 1);
        
        for (int i = 0; i < array.length; i++)
        {
            IOUtils.write(array[i], oStream);
            IOUtils.write(System.getProperty("line.separator"), oStream);
        }
    }
    
    public static void mergeSortUseFile(File[] fileArray) throws IOException
    {
        
        int arrLen = fileArray.length;
        if (arrLen < 2)
        {
            return;
        }
        
        int tempFileNum = arrLen / 2;
        
        if (arrLen % 2 != 0)
        {
            tempFileNum = tempFileNum + 1;
        }
        
        File[] tempFileArray = new File[tempFileNum];
        
        for (int i = 0; i < tempFileNum; i++)
        {
            File tempFile = new File("E:\\data\\msisdnMerge" + fileTempNum + i + ".txt");
            tempFileArray[i] = tempFile;
            
            if (!tempFile.exists())
            {
                tempFile.createNewFile();
            }
        }
        
        fileTempNum = fileTempNum + tempFileNum;
        
        int fileIndex = 1;
        while (fileIndex < arrLen)
        {
            mergeFile(fileArray[fileIndex - 1],
                    fileArray[fileIndex],
                    tempFileArray[fileIndex / 2]);
            fileIndex = fileIndex + 2;
        }
        
        if(arrLen%2 != 0)
        {
            tempFileArray[tempFileNum - 1] = fileArray[arrLen - 1];
        }
        
        mergeSortUseFile(tempFileArray);
    }
    
    public static void mergeFile(File file1, File file2, File tempFile)
            throws IOException
    {
        BufferedReader file1Reader = new BufferedReader(new FileReader(file1));
        BufferedReader file2Reader = new BufferedReader(new FileReader(file2));
        FileOutputStream oStream = new FileOutputStream(tempFile, true);
        String obj1 = file1Reader.readLine();
        String obj2 = file2Reader.readLine();
        
        while (obj1 != null && obj2 != null)
        {
            if (obj1.compareTo(obj2) <= 0)
            {
                IOUtils.write(obj1, oStream);
                IOUtils.write(System.getProperty("line.separator"), oStream);
                obj1 = file1Reader.readLine();
            }
            else
            {
                IOUtils.write(obj2, oStream);
                IOUtils.write(System.getProperty("line.separator"), oStream);
                obj2 = file2Reader.readLine();
            }
        }
        
        if (obj1 != null)
        {
            while (obj1 != null)
            {
                IOUtils.write(obj1, oStream);
                IOUtils.write(System.getProperty("line.separator"), oStream);
                obj1 = file1Reader.readLine();
            }
        }
        
        if (obj2 != null)
        {
            while (obj2 != null)
            {
                IOUtils.write(obj2, oStream);
                IOUtils.write(System.getProperty("line.separator"), oStream);
                obj2 = file2Reader.readLine();
            }
        }
        
    }
    
    public static void testArraySort()
    {
        SortPractice<Integer> sort = new SortPractice<Integer>();
        
        Integer num01 = new Integer(12);
        Integer num02 = new Integer(13);
        Integer num03 = new Integer(44);
        Integer num04 = new Integer(3);
        Integer num05 = new Integer(13);
        Integer num06 = new Integer(5);
        Integer num07 = new Integer(7);
        
        Integer[] array = {num01, num02, num03, num04, num05, num06, num07,
                num04, num06, num01};
        
        sort.mergeSort(array, 0, array.length - 1);
        System.out.println(Arrays.toString(array));
        
    }
    
    public static void main(String[] args) throws IOException
    {
        testmerge(100000);
    }
    
}

 

分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics