Python 2.6.6 (r266:84292, Sep 15 2010, 16:22:56)
[GCC 4.4.5] on linux2
Type "help", "copyright", "credits" or "license" for more information.
>>> import random, cProfile
>>> from datetime import datetime
>>>
>>> CHAR_N = 10000
>>> char_map = [unichr(i) for i in range(CHAR_N)]
>>> random.shuffle(char_map)
>>>
>>> def chargen(limit):
... chars = iter(char_map)
... i = 0
... while i>> def func1(generated):
... tokens = []
... for token in generated:
... if token not in tokens:
... tokens.append(token)
... return tokens
...
>>> def func2(generated):
... tokens = {}
... [tokens.update({value: i}) for i, value in enumerate(generated) \
... if value not in tokens]
... token_list = [(k,v) for k,v in tokens.items()]
... return [i[0] for i in sorted(token_list, key=lambda x: x[1])]
...
>>> def func3(generated):
... def cnt():
... i = 0
... while True:
... i += 1
... yield i
... c = cnt()
... tokens = {}
... [tokens.update({value: c.next()}) for value in generated \
... if value not in tokens]
... token_list = [(k,v) for k,v in tokens.items()]
... return [i[0] for i in sorted(token_list, key=lambda x: x[1])]
...
>>> def func4(generated):
... mmap = set()
... return [mmap.add(i) or i for i in generated if i not in mmap]
...
>>> def func5(generated):
... mmap = {}
... return [mmap.update({i: True}) or i for i in generated if i not in mmap]
...
>>> generated = [i for i in chargen(100000)]
>>>
>>> cProfile.run('func1(generated)')
10003 function calls in 113.244 CPU seconds
Ordered by: standard name
ncalls tottime percall cumtime percall filename:lineno(function)
1 113.215 113.215 113.243 113.243 :1(func1)
1 0.001 0.001 113.244 113.244 :1()
10000 0.028 0.000 0.028 0.000 {method 'append' of 'list' objects}
1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}
>>> cProfile.run('func2(generated)')
20005 function calls in 0.418 CPU seconds
Ordered by: standard name
ncalls tottime percall cumtime percall filename:lineno(function)
1 0.187 0.187 0.409 0.409 :1(func2)
10000 0.033 0.000 0.033 0.000 :6()
1 0.009 0.009 0.418 0.418 :1()
1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}
1 0.054 0.054 0.054 0.054 {method 'items' of 'dict' objects}
10000 0.033 0.000 0.033 0.000 {method 'update' of 'dict' objects}
1 0.101 0.101 0.134 0.134 {sorted}
>>> cProfile.run('func3(generated)')
30006 function calls in 0.345 CPU seconds
Ordered by: standard name
ncalls tottime percall cumtime percall filename:lineno(function)
1 0.188 0.188 0.339 0.339 :1(func3)
10000 0.019 0.000 0.019 0.000 :12()
10001 0.032 0.000 0.032 0.000 :2(cnt)
1 0.006 0.006 0.345 0.345 :1()
1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}
1 0.010 0.010 0.010 0.010 {method 'items' of 'dict' objects}
10000 0.045 0.000 0.045 0.000 {method 'update' of 'dict' objects}
1 0.046 0.046 0.065 0.065 {sorted}
>>> cProfile.run('func4(generated)')
10003 function calls in 0.135 CPU seconds
Ordered by: standard name
ncalls tottime percall cumtime percall filename:lineno(function)
1 0.111 0.111 0.132 0.132 :1(func4)
1 0.003 0.003 0.135 0.135 :1()
10000 0.020 0.000 0.020 0.000 {method 'add' of 'set' objects}
1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}
>>> cProfile.run('func5(generated)')
10003 function calls in 0.196 CPU seconds
Ordered by: standard name
ncalls tottime percall cumtime percall filename:lineno(function)
1 0.124 0.124 0.195 0.195 :1(func5)
1 0.002 0.002 0.196 0.196 :1()
1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}
10000 0.070 0.000 0.070 0.000 {method 'update' of 'dict' objects}
>>>