Erlangsthenes筛在Erlang
我正在学习Erlang。作为练习,我学习了生成质数的Eratosthenes筛分算法。这是我的代码:
-module(seed2).
-export([get/1]).
get(N) -> WorkList = lists:duplicate(N, empty),
get(2, N, WorkList, []).
get(thats_the_end, _N, _WorkList, ResultList) -> lists:reverse(ResultList);
get(CurrentPrime, N, WorkList, ResultList) -> ModWorkList = markAsPrime(CurrentPrime, N, WorkList),
NextPrime = findNextPrime(CurrentPrime + 1, N, WorkList),
get(NextPrime, N, ModWorkList, [CurrentPrime|ResultList]).
markAsPrime(CurrentPrime, N, WorkList) when CurrentPrime =< N -> WorkListMod = replace(CurrentPrime, WorkList, prime),
markAllMultiples(CurrentPrime, N, 2*CurrentPrime, WorkListMod).
markAllMultiples(_ThePrime, N, TheCurentMark, WorkList) when TheCurentMark > N -> WorkList;
markAllMultiples(ThePrime, N, TheCurrentMark, WorkList) -> WorkListMod = replace(TheCurrentMark, WorkList, marked),
markAllMultiples(ThePrime, N, TheCurrentMark + ThePrime, WorkListMod).
findNextPrime(Iterator, N, _WorkList) when Iterator > N -> thats_the_end;
findNextPrime(Iterator, N, WorkList) -> I = lists:nth(Iterator, WorkList),
if
I =:= empty -> Iterator;
true -> findNextPrime(Iterator + 1, N, WorkList)
end.
replace(N, L, New)-> {L1, [_H|L2]} = lists:split(N - 1, L),
lists:append(L1, [New|L2]).
这段代码实际上是有效的:)。问题是我感觉这不是最好的实现。
我的问题是实施" Eratosthenes筛"的"古怪"方式是什么?
编辑:好的,Andreas解决方案很好,但是很慢。有什么想法可以改善这一点吗?
解决方案
我通过使用并发处理解决了这个问题。
来源
这是一个简单(但并非很快)的筛子实现:
-module(primes).
-export([sieve/1]).
-include_lib("eunit/include/eunit.hrl").
sieve([]) ->
[];
sieve([H|T]) ->
List = lists:filter(fun(N) -> N rem H /= 0 end, T),
[H|sieve(List)];
sieve(N) ->
sieve(lists:seq(2,N)).
到目前为止,我最快的代码(比Andrea更快)是使用array:
-module(seed4).
-export([get/1]).
get(N) -> WorkList = array:new([{size, N}, {default, empty}]),
get(2, N, WorkList, []).
get(thats_the_end, _N, _WorkList, ResultList) -> lists:reverse(ResultList);
get(CurrentPrime, N, WorkList, ResultList) -> ModWorkList = markAsPrime(CurrentPrime, N, WorkList),
NextPrime = findNextPrime(CurrentPrime + 1, N, WorkList),
get(NextPrime, N, ModWorkList, [CurrentPrime|ResultList]).
markAsPrime(CurrentPrime, N, WorkList) when CurrentPrime =< N -> WorkListMod = replace(CurrentPrime, WorkList, prime),
markAllMultiples(CurrentPrime, N, 2*CurrentPrime, WorkListMod).
markAllMultiples(_ThePrime, N, TheCurentMark, WorkList) when TheCurentMark > N -> WorkList;
markAllMultiples(ThePrime, N, TheCurrentMark, WorkList) -> WorkListMod = replace(TheCurrentMark, WorkList, marked),
markAllMultiples(ThePrime, N, TheCurrentMark + ThePrime, WorkListMod).
findNextPrime(Iterator, N, _WorkList) when Iterator > N -> thats_the_end;
findNextPrime(Iterator, N, WorkList) -> I = array:get(Iterator - 1, WorkList),
if
I =:= empty -> Iterator;
true -> findNextPrime(Iterator + 1, N, WorkList)
end.
replace(N, L, New) -> array:set(N - 1, New, L).
我没有详细研究这些内容,但是我已经在下面测试了我的实现(这是我为欧拉计划挑战而写的),并且比上述两个实现快了几个数量级。直到我取消了一些自定义功能,而是寻找列表:可以做到相同的功能,这实在是太慢了。最好学习该课程,以便始终查看是否需要执行某项操作的库实现,这通常会更快!在2.8GHz iMac上,这可以在3.6秒内计算出多达200万个质数的总和。
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%Sum of all primes below Max. Will use sieve of Eratosthenes
sum_primes(Max) ->
LastCheck = round(math:sqrt(Max)),
All = lists:seq(3, Max, 2), %note are creating odd-only array
Primes = sieve(All, Max, LastCheck),
%io:format("Primes: ~p~n", [Primes]),
lists:sum(Primes) + 2. %adding back the number 2 to the list
%sieve of Eratosthenes
sieve(All, Max, LastCheck) ->
sieve([], All, Max, LastCheck).
sieve(Primes, All, Max, LastCheck) ->
%swap the first element of All onto Primes
[Cur|All2] = All,
Primes2 = [Cur|Primes],
case Cur > LastCheck of
true ->
lists:append(Primes2, All2); %all known primes and all remaining from list (not sieved) are prime
false ->
All3 = lists:filter(fun(X) -> X rem Cur =/= 0 end, All2),
sieve(Primes2, All3, Max, LastCheck)
end.
我有点喜欢这个主题,因此我开始修改BarryE的代码,并通过创建自己的list_filter函数使它快了70%,并且可以利用两个CPU。我也很容易在两个版本之间切换。测试运行显示:
61> timer:tc(test,sum_primes,[2000000]).
{2458537,142913970581}
代码:
-module(test).
%%-export([sum_primes/1]).
-compile(export_all).
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%Sum of all primes below Max. Will use sieve of Eratosthenes
sum_primes(Max) ->
LastCheck = round(math:sqrt(Max)),
All = lists:seq(3, Max, 2), %note are creating odd-only array
%%Primes = sieve(noref,All, LastCheck),
Primes = spawn_sieve(All, LastCheck),
lists:sum(Primes) + 2. %adding back the number 2 to the list
%%sieve of Eratosthenes
sieve(Ref,All, LastCheck) ->
sieve(Ref,[], All, LastCheck).
sieve(noref,Primes, All = [Cur|_], LastCheck) when Cur > LastCheck ->
lists:reverse(Primes, All); %all known primes and all remaining from list (not sieved) are prime
sieve({Pid,Ref},Primes, All=[Cur|_], LastCheck) when Cur > LastCheck ->
Pid ! {Ref,lists:reverse(Primes, All)};
sieve(Ref,Primes, [Cur|All2], LastCheck) ->
%%All3 = lists:filter(fun(X) -> X rem Cur =/= 0 end, All2),
All3 = lists_filter(Cur,All2),
sieve(Ref,[Cur|Primes], All3, LastCheck).
lists_filter(Cur,All2) ->
lists_filter(Cur,All2,[]).
lists_filter(V,[H|T],L) ->
case H rem V of
0 ->
lists_filter(V,T,L);
_ ->
lists_filter(V,T,[H|L])
end;
lists_filter(_,[],L) ->
lists:reverse(L).
%% This is a sloppy implementation ;)
spawn_sieve(All,Last) ->
%% split the job
{L1,L2} = lists:split(round(length(All)/2),All),
Filters = filters(All,Last),
%%io:format("F:~p~n",[Filters]),
L3 = lists:append(Filters,L2),
%%io:format("L1:~w~n",[L1]),
%% io:format("L2:~w~n",[L3]),
%%lists_filter(Cur,All2,[]).
Pid = self(),
Ref1=make_ref(),
Ref2=make_ref(),
erlang:spawn(?MODULE,sieve,[{Pid,Ref1},L1,Last]),
erlang:spawn(?MODULE,sieve,[{Pid,Ref2},L3,Last]),
Res1=receive
{Ref1,R1} ->
{1,R1};
{Ref2,R1} ->
{2,R1}
end,
Res2= receive
{Ref1,R2} ->
{1,R2};
{Ref2,R2} ->
{2,R2}
end,
apnd(Filters,Res1,Res2).
filters([H|T],Last) when H
[H|filters(T,Last)];
filters([H|_],_) ->
[H];
filters(_,_) ->
[].
apnd(Filters,{1,N1},{2,N2}) ->
lists:append(N1,subtract(N2,Filters));
apnd(Filters,{2,N2},{1,N1}) ->
lists:append(N1,subtract(N2,Filters)).
subtract([H|L],[H|T]) ->
subtract(L,T);
subtract(L=[A|_],[B|_]) when A > B ->
L;
subtract(L,[_|T]) ->
subtract(L,T);
subtract(L,[]) ->
L.
我以前的帖子未正确格式化。这是代码的转贴。很抱歉发送垃圾邮件...
-module(test).
%%-export([sum_primes/1]).
-compile(export_all).
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%Sum of all primes below Max. Will use sieve of Eratosthenes
sum_primes(Max) ->
LastCheck = round(math:sqrt(Max)),
All = lists:seq(3, Max, 2), %note are creating odd-only array
%%Primes = sieve(noref,All, LastCheck),
Primes = spawn_sieve(All, LastCheck),
lists:sum(Primes) + 2. %adding back the number 2 to the list
%%sieve of Eratosthenes
sieve(Ref,All, LastCheck) ->
sieve(Ref,[], All, LastCheck).
sieve(noref,Primes, All = [Cur|_], LastCheck) when Cur > LastCheck ->
lists:reverse(Primes, All); %all known primes and all remaining from list (not sieved) are prime
sieve({Pid,Ref},Primes, All=[Cur|_], LastCheck) when Cur > LastCheck ->
Pid ! {Ref,lists:reverse(Primes, All)};
sieve(Ref,Primes, [Cur|All2], LastCheck) ->
%%All3 = lists:filter(fun(X) -> X rem Cur =/= 0 end, All2),
All3 = lists_filter(Cur,All2),
sieve(Ref,[Cur|Primes], All3, LastCheck).
lists_filter(Cur,All2) ->
lists_filter(Cur,All2,[]).
lists_filter(V,[H|T],L) ->
case H rem V of
0 ->
lists_filter(V,T,L);
_ ->
lists_filter(V,T,[H|L])
end;
lists_filter(_,[],L) ->
lists:reverse(L).
%% This is a sloppy implementation ;)
spawn_sieve(All,Last) ->
%% split the job
{L1,L2} = lists:split(round(length(All)/2),All),
Filters = filters(All,Last),
L3 = lists:append(Filters,L2),
Pid = self(),
Ref1=make_ref(),
Ref2=make_ref(),
erlang:spawn(?MODULE,sieve,[{Pid,Ref1},L1,Last]),
erlang:spawn(?MODULE,sieve,[{Pid,Ref2},L3,Last]),
Res1=receive
{Ref1,R1} ->
{1,R1};
{Ref2,R1} ->
{2,R1}
end,
Res2= receive
{Ref1,R2} ->
{1,R2};
{Ref2,R2} ->
{2,R2}
end,
apnd(Filters,Res1,Res2).
filters([H|T],Last) when H
[H|filters(T,Last)];
filters([H|_],_) ->
[H];
filters(_,_) ->
[].
apnd(Filters,{1,N1},{2,N2}) ->
lists:append(N1,subtract(N2,Filters));
apnd(Filters,{2,N2},{1,N1}) ->
lists:append(N1,subtract(N2,Filters)).
subtract([H|L],[H|T]) ->
subtract(L,T);
subtract(L=[A|_],[B|_]) when A > B ->
L;
subtract(L,[_|T]) ->
subtract(L,T);
subtract(L,[]) ->
L.
我们可以向老板展示:http://www.sics.se/~joe/apachevsyaws.html。其他一些(经典的?)erlang参数是:
-不间断操作,可以即时加载新代码。
-易于调试,不再需要分析核心转储。
-易于利用多核/ CPU
-容易利用集群吗?
谁想处理指针和东西?这不是21世纪吗? ;)
一些陷阱:
编写某些东西可能看起来容易又快捷,但是性能却很差。如果我
想快点做,我通常最终会写2-4个相同的版本
功能。通常,我们需要采取鹰眼的态度来解决可能是一个问题的问题。
与所使用的也没有什么不同。
- 在列表中查找内容>大约1000个元素的速度很慢,请尝试使用ets表。
- 字符串" abc"占用的空间比3个字节大得多。因此,请尝试使用二进制文件(这很痛苦)。
总而言之,我认为在使用erlang编写某些内容时,始终要记住性能问题。 Erlang帅哥需要解决这个问题,我认为他们会的。
在这里可以找到4种不同的实现,以在Erlang中查找质数(其中两个是"实"筛)并获得性能测量结果:
http://caylespandon.blogspot.com/2009/01/in-euler-problem-10-we-are-asked-to.html
这是我的筛子实现,它使用列表推导并尝试进行尾递归。我在末尾反转了列表,以便对素数进行排序:
primes(Prime, Max, Primes,Integers) when Prime > Max ->
lists:reverse([Prime|Primes]) ++ Integers;
primes(Prime, Max, Primes, Integers) ->
[NewPrime|NewIntegers] = [ X || X <- Integers, X rem Prime =/= 0 ],
primes(NewPrime, Max, [Prime|Primes], NewIntegers).
primes(N) ->
primes(2, round(math:sqrt(N)), [], lists:seq(3,N,2)). % skip odds
在我的2ghz mac机上,大约需要2.8毫秒来计算最多200万的素数。
足够简单,完全实现算法,并且不使用任何库函数(仅使用模式匹配和列表理解)。
确实不是很强大。我只是试图使其尽可能简单。
-module(primes). -export([primes/1, primes/2]). primes(X) -> sieve(range(2, X)). primes(X, Y) -> remove(primes(X), primes(Y)). range(X, X) -> [X]; range(X, Y) -> [X | range(X + 1, Y)]. sieve([X]) -> [X]; sieve([H | T]) -> [H | sieve(remove([H * X || X <-[H | T]], T))]. remove(_, []) -> []; remove([H | X], [H | Y]) -> remove(X, Y); remove(X, [H | Y]) -> [H | remove(X, Y)].

