`

[练习]erlang 生成文本内单词索引

 
阅读更多

不知道标题这么取是否恰当

就是生成单词内的索引

看《改变未来的九大算法》(是本民科 清晰易懂)的第二章 仿照里面的索引 如下:



 文本里内容和图中也是一样的

先把结果打出来 因为我很懒 就不对dict做解析了 直接返回的就是个dict 结果如下:

29> filereader:read("G:\erl_workbench\pattern", "*.txt").
File name:"1.txt"
File name:"2.txt"
File name:"3.txt"
{{dict,3,16,16,8,80,48,
       {[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]},
       {{[],
         [[3|{"3.txt"}]],
         [],[],[],[],
         [[2|{"2.txt"}]],
         [],[],[],[],
         [[1|{"1.txt"}]],
         [],[],[],[]}}},
 {dict,9,16,16,8,80,48,
       {[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]},
       {{[["on",{1,4},{2,4}]],
         [["dog",{2,2},{3,6}]],
         [],[],
         [["while",{3,4}]],
         [],[],
         [["cat",{1,2},{3,2}],["sat",{1,3},{3,7}]],
         [],
         [["mat",{1,6},{2,6}]],
         [],[],
         [["the",{1,...},{...}|...],["stood",{...}|...],[[...]|...]],
         [],[],[]}}}}

 

代码如下:

%% @author cc fairjm
%% fair-jm.iteye.com
%% @doc @todo read files and return indexes of the words in the files.


-module(filereader).

-export([read/2,read_info/3]).


read(Dir,WildCard) ->
  process_flag(trap_exit, true),
  file:set_cwd(Dir),
  Files=filelib:wildcard(WildCard),
  {_Num,FileDict}=lists:foldl(fun readFile/2,{1,dict:new()}, Files),
  WordDict=recinfo(dict:new(),0,length(Files)),
  {FileDict,WordDict}
.

readFile(Filename,{In,Dict}) ->
 io:format("File name:~p~n", [Filename]), 
 Self=self(),
 spawn_link(?MODULE,read_info,[Filename,In,Self]),
 {In+1,dict:store(In, {Filename}, Dict)}
.

read_info(Filename,Number,To) ->
	%io:format("read_info ~p",[Filename]),
	case file:read_file(Filename) of
		{ok,Data}->
			Tokens=string:tokens(binary_to_list(Data), " \r\n"),
			{_N,Dict}=lists:foldl(
			  fun(E,In) ->
				{N,Dict}=In,
				{N+1,dict:append(E, {Number,N}, Dict)}
		      end, {1,dict:new()}, Tokens),
			To ! {ok,Dict};
		{_Other} ->
           To ! error
     end
.

recinfo(Dict,Now,Size) ->
	case Now==Size of
		true -> Dict;
		false ->
	receive
		{ok,Data} ->
			%io:format("data received ~p", [Data]),
			NewDict=dict:merge(fun(_Key,V1,V2)-> V1++V2 end, Dict, Data),
			recinfo(NewDict,Now+1,Size);
		{_Other} ->
			%io:format("error"),
			recinfo(Dict,Now+1,Size)
    end
	end.

 在IDE里操作一切正常 但是到erl中(win下) 发现代码中的 file:set_cwd/1没起作用 要手动在erl中 file:set_cwd/1才可以....不知道为什么

 

API方法其实只有一个 就是read/2 那个read_info/3也导出是因为不导出spawn_link/3那就无法运行了(额 erlang一些地方都忘了 有人知道回答一下吗?)

  • 大小: 20.7 KB
0
3
分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics