Python 2.6.6 (r266:84292, Sep 15 2010, 16:22:56)
[GCC 4.4.5] on linux2
Type "help", "copyright", "credits" or "license" for more information.
>>> import random, cProfile
>>> from datetime import datetime
>>>
>>> CHAR_N = 10000
>>> char_map = [unichr(i) for i in range(CHAR_N)]
>>> random.shuffle(char_map)
>>>
>>> def chargen(limit):
...     chars = iter(char_map)
...     i = 0
...     while i>> def func1(generated):
...     tokens = []
...     for token in generated:
...         if token not in tokens:
...             tokens.append(token)
...     return tokens
...
>>> def func2(generated):
...     tokens = {}
...     [tokens.update({value: i}) for i, value in enumerate(generated) \
...          if value not in tokens]
...     token_list = [(k,v) for k,v in tokens.items()]
...     return [i[0] for i in sorted(token_list, key=lambda x: x[1])]
...
>>> def func3(generated):
...     def cnt():
...         i = 0
...         while True:
...             i += 1
...             yield i
...     c = cnt()
...     tokens = {}
...     [tokens.update({value: c.next()}) for value in generated \
...          if value not in tokens]
...     token_list = [(k,v) for k,v in tokens.items()]
...     return [i[0] for i in sorted(token_list, key=lambda x: x[1])]
...
>>> def func4(generated):
...     mmap = set()
...     return [mmap.add(i) or i for i in generated if i not in mmap]
...
>>> def func5(generated):
...     mmap = {}
...     return [mmap.update({i: True}) or i for i in generated if i not in mmap]
...
>>> generated = [i for i in chargen(100000)]
>>>
>>> cProfile.run('func1(generated)')
         10003 function calls in 113.244 CPU seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1  113.215  113.215  113.243  113.243 :1(func1)
        1    0.001    0.001  113.244  113.244 :1()
    10000    0.028    0.000    0.028    0.000 {method 'append' of 'list' objects}
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}


>>> cProfile.run('func2(generated)')
         20005 function calls in 0.418 CPU seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.187    0.187    0.409    0.409 :1(func2)
    10000    0.033    0.000    0.033    0.000 :6()
        1    0.009    0.009    0.418    0.418 :1()
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}
        1    0.054    0.054    0.054    0.054 {method 'items' of 'dict' objects}
    10000    0.033    0.000    0.033    0.000 {method 'update' of 'dict' objects}
        1    0.101    0.101    0.134    0.134 {sorted}


>>> cProfile.run('func3(generated)')
         30006 function calls in 0.345 CPU seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.188    0.188    0.339    0.339 :1(func3)
    10000    0.019    0.000    0.019    0.000 :12()
    10001    0.032    0.000    0.032    0.000 :2(cnt)
        1    0.006    0.006    0.345    0.345 :1()
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}
        1    0.010    0.010    0.010    0.010 {method 'items' of 'dict' objects}
    10000    0.045    0.000    0.045    0.000 {method 'update' of 'dict' objects}
        1    0.046    0.046    0.065    0.065 {sorted}


>>> cProfile.run('func4(generated)')
         10003 function calls in 0.135 CPU seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.111    0.111    0.132    0.132 :1(func4)
        1    0.003    0.003    0.135    0.135 :1()
    10000    0.020    0.000    0.020    0.000 {method 'add' of 'set' objects}
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}


>>> cProfile.run('func5(generated)')
         10003 function calls in 0.196 CPU seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.124    0.124    0.195    0.195 :1(func5)
        1    0.002    0.002    0.196    0.196 :1()
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}
    10000    0.070    0.000    0.070    0.000 {method 'update' of 'dict' objects}


>>>