Erlangsthenes筛在Erlang
我正在学习Erlang。作为练习,我学习了生成质数的Eratosthenes筛分算法。这是我的代码:
-module(seed2). -export([get/1]). get(N) -> WorkList = lists:duplicate(N, empty), get(2, N, WorkList, []). get(thats_the_end, _N, _WorkList, ResultList) -> lists:reverse(ResultList); get(CurrentPrime, N, WorkList, ResultList) -> ModWorkList = markAsPrime(CurrentPrime, N, WorkList), NextPrime = findNextPrime(CurrentPrime + 1, N, WorkList), get(NextPrime, N, ModWorkList, [CurrentPrime|ResultList]). markAsPrime(CurrentPrime, N, WorkList) when CurrentPrime =< N -> WorkListMod = replace(CurrentPrime, WorkList, prime), markAllMultiples(CurrentPrime, N, 2*CurrentPrime, WorkListMod). markAllMultiples(_ThePrime, N, TheCurentMark, WorkList) when TheCurentMark > N -> WorkList; markAllMultiples(ThePrime, N, TheCurrentMark, WorkList) -> WorkListMod = replace(TheCurrentMark, WorkList, marked), markAllMultiples(ThePrime, N, TheCurrentMark + ThePrime, WorkListMod). findNextPrime(Iterator, N, _WorkList) when Iterator > N -> thats_the_end; findNextPrime(Iterator, N, WorkList) -> I = lists:nth(Iterator, WorkList), if I =:= empty -> Iterator; true -> findNextPrime(Iterator + 1, N, WorkList) end. replace(N, L, New)-> {L1, [_H|L2]} = lists:split(N - 1, L), lists:append(L1, [New|L2]).
这段代码实际上是有效的:)。问题是我感觉这不是最好的实现。
我的问题是实施" Eratosthenes筛"的"古怪"方式是什么?
编辑:好的,Andreas解决方案很好,但是很慢。有什么想法可以改善这一点吗?
解决方案
我通过使用并发处理解决了这个问题。
来源
这是一个简单(但并非很快)的筛子实现:
-module(primes). -export([sieve/1]). -include_lib("eunit/include/eunit.hrl"). sieve([]) -> []; sieve([H|T]) -> List = lists:filter(fun(N) -> N rem H /= 0 end, T), [H|sieve(List)]; sieve(N) -> sieve(lists:seq(2,N)).
到目前为止,我最快的代码(比Andrea更快)是使用array:
-module(seed4). -export([get/1]). get(N) -> WorkList = array:new([{size, N}, {default, empty}]), get(2, N, WorkList, []). get(thats_the_end, _N, _WorkList, ResultList) -> lists:reverse(ResultList); get(CurrentPrime, N, WorkList, ResultList) -> ModWorkList = markAsPrime(CurrentPrime, N, WorkList), NextPrime = findNextPrime(CurrentPrime + 1, N, WorkList), get(NextPrime, N, ModWorkList, [CurrentPrime|ResultList]). markAsPrime(CurrentPrime, N, WorkList) when CurrentPrime =< N -> WorkListMod = replace(CurrentPrime, WorkList, prime), markAllMultiples(CurrentPrime, N, 2*CurrentPrime, WorkListMod). markAllMultiples(_ThePrime, N, TheCurentMark, WorkList) when TheCurentMark > N -> WorkList; markAllMultiples(ThePrime, N, TheCurrentMark, WorkList) -> WorkListMod = replace(TheCurrentMark, WorkList, marked), markAllMultiples(ThePrime, N, TheCurrentMark + ThePrime, WorkListMod). findNextPrime(Iterator, N, _WorkList) when Iterator > N -> thats_the_end; findNextPrime(Iterator, N, WorkList) -> I = array:get(Iterator - 1, WorkList), if I =:= empty -> Iterator; true -> findNextPrime(Iterator + 1, N, WorkList) end. replace(N, L, New) -> array:set(N - 1, New, L).
我没有详细研究这些内容,但是我已经在下面测试了我的实现(这是我为欧拉计划挑战而写的),并且比上述两个实现快了几个数量级。直到我取消了一些自定义功能,而是寻找列表:可以做到相同的功能,这实在是太慢了。最好学习该课程,以便始终查看是否需要执行某项操作的库实现,这通常会更快!在2.8GHz iMac上,这可以在3.6秒内计算出多达200万个质数的总和。
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %Sum of all primes below Max. Will use sieve of Eratosthenes sum_primes(Max) -> LastCheck = round(math:sqrt(Max)), All = lists:seq(3, Max, 2), %note are creating odd-only array Primes = sieve(All, Max, LastCheck), %io:format("Primes: ~p~n", [Primes]), lists:sum(Primes) + 2. %adding back the number 2 to the list %sieve of Eratosthenes sieve(All, Max, LastCheck) -> sieve([], All, Max, LastCheck). sieve(Primes, All, Max, LastCheck) -> %swap the first element of All onto Primes [Cur|All2] = All, Primes2 = [Cur|Primes], case Cur > LastCheck of true -> lists:append(Primes2, All2); %all known primes and all remaining from list (not sieved) are prime false -> All3 = lists:filter(fun(X) -> X rem Cur =/= 0 end, All2), sieve(Primes2, All3, Max, LastCheck) end.
我有点喜欢这个主题,因此我开始修改BarryE的代码,并通过创建自己的list_filter函数使它快了70%,并且可以利用两个CPU。我也很容易在两个版本之间切换。测试运行显示:
61> timer:tc(test,sum_primes,[2000000]). {2458537,142913970581}
代码:
-module(test). %%-export([sum_primes/1]). -compile(export_all). %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%Sum of all primes below Max. Will use sieve of Eratosthenes sum_primes(Max) -> LastCheck = round(math:sqrt(Max)), All = lists:seq(3, Max, 2), %note are creating odd-only array %%Primes = sieve(noref,All, LastCheck), Primes = spawn_sieve(All, LastCheck), lists:sum(Primes) + 2. %adding back the number 2 to the list %%sieve of Eratosthenes sieve(Ref,All, LastCheck) -> sieve(Ref,[], All, LastCheck). sieve(noref,Primes, All = [Cur|_], LastCheck) when Cur > LastCheck -> lists:reverse(Primes, All); %all known primes and all remaining from list (not sieved) are prime sieve({Pid,Ref},Primes, All=[Cur|_], LastCheck) when Cur > LastCheck -> Pid ! {Ref,lists:reverse(Primes, All)}; sieve(Ref,Primes, [Cur|All2], LastCheck) -> %%All3 = lists:filter(fun(X) -> X rem Cur =/= 0 end, All2), All3 = lists_filter(Cur,All2), sieve(Ref,[Cur|Primes], All3, LastCheck). lists_filter(Cur,All2) -> lists_filter(Cur,All2,[]). lists_filter(V,[H|T],L) -> case H rem V of 0 -> lists_filter(V,T,L); _ -> lists_filter(V,T,[H|L]) end; lists_filter(_,[],L) -> lists:reverse(L). %% This is a sloppy implementation ;) spawn_sieve(All,Last) -> %% split the job {L1,L2} = lists:split(round(length(All)/2),All), Filters = filters(All,Last), %%io:format("F:~p~n",[Filters]), L3 = lists:append(Filters,L2), %%io:format("L1:~w~n",[L1]), %% io:format("L2:~w~n",[L3]), %%lists_filter(Cur,All2,[]). Pid = self(), Ref1=make_ref(), Ref2=make_ref(), erlang:spawn(?MODULE,sieve,[{Pid,Ref1},L1,Last]), erlang:spawn(?MODULE,sieve,[{Pid,Ref2},L3,Last]), Res1=receive {Ref1,R1} -> {1,R1}; {Ref2,R1} -> {2,R1} end, Res2= receive {Ref1,R2} -> {1,R2}; {Ref2,R2} -> {2,R2} end, apnd(Filters,Res1,Res2). filters([H|T],Last) when H [H|filters(T,Last)]; filters([H|_],_) -> [H]; filters(_,_) -> []. apnd(Filters,{1,N1},{2,N2}) -> lists:append(N1,subtract(N2,Filters)); apnd(Filters,{2,N2},{1,N1}) -> lists:append(N1,subtract(N2,Filters)). subtract([H|L],[H|T]) -> subtract(L,T); subtract(L=[A|_],[B|_]) when A > B -> L; subtract(L,[_|T]) -> subtract(L,T); subtract(L,[]) -> L.
我以前的帖子未正确格式化。这是代码的转贴。很抱歉发送垃圾邮件...
-module(test). %%-export([sum_primes/1]). -compile(export_all). %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%Sum of all primes below Max. Will use sieve of Eratosthenes sum_primes(Max) -> LastCheck = round(math:sqrt(Max)), All = lists:seq(3, Max, 2), %note are creating odd-only array %%Primes = sieve(noref,All, LastCheck), Primes = spawn_sieve(All, LastCheck), lists:sum(Primes) + 2. %adding back the number 2 to the list %%sieve of Eratosthenes sieve(Ref,All, LastCheck) -> sieve(Ref,[], All, LastCheck). sieve(noref,Primes, All = [Cur|_], LastCheck) when Cur > LastCheck -> lists:reverse(Primes, All); %all known primes and all remaining from list (not sieved) are prime sieve({Pid,Ref},Primes, All=[Cur|_], LastCheck) when Cur > LastCheck -> Pid ! {Ref,lists:reverse(Primes, All)}; sieve(Ref,Primes, [Cur|All2], LastCheck) -> %%All3 = lists:filter(fun(X) -> X rem Cur =/= 0 end, All2), All3 = lists_filter(Cur,All2), sieve(Ref,[Cur|Primes], All3, LastCheck). lists_filter(Cur,All2) -> lists_filter(Cur,All2,[]). lists_filter(V,[H|T],L) -> case H rem V of 0 -> lists_filter(V,T,L); _ -> lists_filter(V,T,[H|L]) end; lists_filter(_,[],L) -> lists:reverse(L). %% This is a sloppy implementation ;) spawn_sieve(All,Last) -> %% split the job {L1,L2} = lists:split(round(length(All)/2),All), Filters = filters(All,Last), L3 = lists:append(Filters,L2), Pid = self(), Ref1=make_ref(), Ref2=make_ref(), erlang:spawn(?MODULE,sieve,[{Pid,Ref1},L1,Last]), erlang:spawn(?MODULE,sieve,[{Pid,Ref2},L3,Last]), Res1=receive {Ref1,R1} -> {1,R1}; {Ref2,R1} -> {2,R1} end, Res2= receive {Ref1,R2} -> {1,R2}; {Ref2,R2} -> {2,R2} end, apnd(Filters,Res1,Res2). filters([H|T],Last) when H [H|filters(T,Last)]; filters([H|_],_) -> [H]; filters(_,_) -> []. apnd(Filters,{1,N1},{2,N2}) -> lists:append(N1,subtract(N2,Filters)); apnd(Filters,{2,N2},{1,N1}) -> lists:append(N1,subtract(N2,Filters)). subtract([H|L],[H|T]) -> subtract(L,T); subtract(L=[A|_],[B|_]) when A > B -> L; subtract(L,[_|T]) -> subtract(L,T); subtract(L,[]) -> L.
我们可以向老板展示:http://www.sics.se/~joe/apachevsyaws.html。其他一些(经典的?)erlang参数是:
-不间断操作,可以即时加载新代码。
-易于调试,不再需要分析核心转储。
-易于利用多核/ CPU
-容易利用集群吗?
谁想处理指针和东西?这不是21世纪吗? ;)
一些陷阱:
编写某些东西可能看起来容易又快捷,但是性能却很差。如果我
想快点做,我通常最终会写2-4个相同的版本
功能。通常,我们需要采取鹰眼的态度来解决可能是一个问题的问题。
与所使用的也没有什么不同。
- 在列表中查找内容>大约1000个元素的速度很慢,请尝试使用ets表。
- 字符串" abc"占用的空间比3个字节大得多。因此,请尝试使用二进制文件(这很痛苦)。
总而言之,我认为在使用erlang编写某些内容时,始终要记住性能问题。 Erlang帅哥需要解决这个问题,我认为他们会的。
在这里可以找到4种不同的实现,以在Erlang中查找质数(其中两个是"实"筛)并获得性能测量结果:
http://caylespandon.blogspot.com/2009/01/in-euler-problem-10-we-are-asked-to.html
这是我的筛子实现,它使用列表推导并尝试进行尾递归。我在末尾反转了列表,以便对素数进行排序:
primes(Prime, Max, Primes,Integers) when Prime > Max -> lists:reverse([Prime|Primes]) ++ Integers; primes(Prime, Max, Primes, Integers) -> [NewPrime|NewIntegers] = [ X || X <- Integers, X rem Prime =/= 0 ], primes(NewPrime, Max, [Prime|Primes], NewIntegers). primes(N) -> primes(2, round(math:sqrt(N)), [], lists:seq(3,N,2)). % skip odds
在我的2ghz mac机上,大约需要2.8毫秒来计算最多200万的素数。
足够简单,完全实现算法,并且不使用任何库函数(仅使用模式匹配和列表理解)。
确实不是很强大。我只是试图使其尽可能简单。
-module(primes). -export([primes/1, primes/2]). primes(X) -> sieve(range(2, X)). primes(X, Y) -> remove(primes(X), primes(Y)). range(X, X) -> [X]; range(X, Y) -> [X | range(X + 1, Y)]. sieve([X]) -> [X]; sieve([H | T]) -> [H | sieve(remove([H * X || X <-[H | T]], T))]. remove(_, []) -> []; remove([H | X], [H | Y]) -> remove(X, Y); remove(X, [H | Y]) -> [H | remove(X, Y)].