Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Small style fixes #1650

Merged
merged 5 commits into from
Oct 25, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
210 changes: 89 additions & 121 deletions docs/notebooks/annoytutorial.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -41,21 +41,21 @@
"name": "stdout",
"output_type": "stream",
"text": [
"CPython 3.6.0\n",
"IPython 6.0.0\n",
"CPython 3.5.3\n",
"IPython 6.2.1\n",
"\n",
"gensim 2.1.0\n",
"numpy 1.12.1\n",
"scipy 0.19.0\n",
"psutil 5.2.2\n",
"matplotlib 2.0.0\n",
"gensim 3.0.1\n",
"numpy 1.13.3\n",
"scipy 1.0.0\n",
"psutil 5.4.0\n",
"matplotlib 2.1.0\n",
"\n",
"compiler : GCC 4.4.7 20120313 (Red Hat 4.4.7-1)\n",
"compiler : GCC 6.3.0 20170406\n",
"system : Linux\n",
"release : 4.9.27-moby\n",
"release : 4.10.0-37-generic\n",
"machine : x86_64\n",
"processor : x86_64\n",
"CPU cores : 4\n",
"CPU cores : 8\n",
"interpreter: 64bit\n"
]
}
Expand All @@ -76,9 +76,7 @@
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"import os.path\n",
Expand All @@ -98,9 +96,7 @@
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"LOGS = False\n",
Expand Down Expand Up @@ -136,28 +132,18 @@
"from gensim.models import Word2Vec, KeyedVectors\n",
"from gensim.models.word2vec import Text8Corpus\n",
"\n",
"# using params from Word2Vec_FastText_Comparison\n",
"\n",
"lr = 0.05\n",
"dim = 100\n",
"ws = 5\n",
"epoch = 5\n",
"minCount = 5\n",
"neg = 5\n",
"loss = 'ns'\n",
"t = 1e-4\n",
"# Using params from Word2Vec_FastText_Comparison\n",
"\n",
"# Same values as used for fastText training above\n",
"params = {\n",
" 'alpha': lr,\n",
" 'size': dim,\n",
" 'window': ws,\n",
" 'iter': epoch,\n",
" 'min_count': minCount,\n",
" 'sample': t,\n",
" 'alpha': 0.05,\n",
" 'size': 100,\n",
" 'window': 5,\n",
" 'iter': 5,\n",
" 'min_count': 5,\n",
" 'sample': 1e-4,\n",
" 'sg': 1,\n",
" 'hs': 0,\n",
" 'negative': neg\n",
" 'negative': 5\n",
"}\n",
"\n",
"model = Word2Vec(Text8Corpus('text8'), **params)\n",
Expand All @@ -181,16 +167,11 @@
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"#Set up the model and vector that we are using in the comparison\n",
"try:\n",
" from gensim.similarities.index import AnnoyIndexer\n",
"except ImportError:\n",
" raise ValueError(\"SKIP: Please install the annoy indexer\")\n",
"# Set up the model and vector that we are using in the comparison\n",
"from gensim.similarities.index import AnnoyIndexer\n",
"\n",
"model.init_sims()\n",
"annoy_index = AnnoyIndexer(model, 100)"
Expand All @@ -204,11 +185,11 @@
{
"data": {
"text/plain": [
"[('the', 1.0000001192092896),\n",
" ('of', 0.8333191275596619),\n",
" ('in', 0.8258030414581299),\n",
" ('a', 0.7722446918487549),\n",
" ('and', 0.7408151626586914)]"
"[('the', 0.9999999403953552),\n",
" ('of', 0.8254586458206177),\n",
" ('in', 0.8207480907440186),\n",
" ('a', 0.7935141324996948),\n",
" ('and', 0.7539303302764893)]"
]
},
"execution_count": 6,
Expand All @@ -226,9 +207,7 @@
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"import time\n",
Expand All @@ -238,9 +217,7 @@
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"def avg_query_time(annoy_index=None, queries=1000):\n",
Expand All @@ -266,10 +243,10 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Gensim (s/query):\t0.01409\n",
"Annoy (s/query):\t0.00031\n",
"Gensim (s/query):\t0.02066\n",
"Annoy (s/query):\t0.00038\n",
"\n",
"Annoy is 44.94 times faster on average on this particular run\n"
"Annoy is 54.59 times faster on average on this particular run\n"
]
}
],
Expand Down Expand Up @@ -329,30 +306,30 @@
"output_type": "stream",
"text": [
"Approximate Neighbors\n",
"('science', 0.9998273665114539)\n",
"('multidisciplinary', 0.6123671233654022)\n",
"('sciences', 0.6045806407928467)\n",
"('astrobiology', 0.5991603136062622)\n",
"('aaas', 0.5971885621547699)\n",
"('bimonthly', 0.5882039070129395)\n",
"('interdisciplinary', 0.5875678360462189)\n",
"('psychohistory', 0.5828642845153809)\n",
"('protoscience', 0.5820913016796112)\n",
"('scientific', 0.5779787003993988)\n",
"('transhumanism', 0.5754979848861694)\n",
"('science', 1.0)\n",
"('multidisciplinary', 0.6066591441631317)\n",
"('astrobiology', 0.5995452105998993)\n",
"('actuarial', 0.5984143614768982)\n",
"('robotics', 0.5919757187366486)\n",
"('sciences', 0.5884003043174744)\n",
"('scientific', 0.5805909633636475)\n",
"('interdisciplinary', 0.5763890445232391)\n",
"('astronautics', 0.5748652517795563)\n",
"('psychohistory', 0.5744689702987671)\n",
"('aaas', 0.574154257774353)\n",
"\n",
"Normal (not Annoy-indexed) Neighbors\n",
"('science', 0.9999998807907104)\n",
"('fiction', 0.7650254964828491)\n",
"('multidisciplinary', 0.6994814872741699)\n",
"('sciences', 0.6872870922088623)\n",
"('astrobiology', 0.6786551475524902)\n",
"('aaas', 0.6754858493804932)\n",
"('technology', 0.6748392581939697)\n",
"('bimonthly', 0.6608479619026184)\n",
"('interdisciplinary', 0.6597993969917297)\n",
"('astronautics', 0.6552520990371704)\n",
"('psychohistory', 0.6519955396652222)\n"
"('science', 1.0)\n",
"('fiction', 0.7570418119430542)\n",
"('multidisciplinary', 0.6905661225318909)\n",
"('astrobiology', 0.6792721152305603)\n",
"('actuarial', 0.6774581670761108)\n",
"('robotics', 0.6670321822166443)\n",
"('vinge', 0.6633784770965576)\n",
"('sciences', 0.6611713767051697)\n",
"('vernor', 0.6521490812301636)\n",
"('popularizer', 0.6499912738800049)\n",
"('scientific', 0.648192286491394)\n"
]
}
],
Expand Down Expand Up @@ -406,9 +383,7 @@
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"fname = '/tmp/mymodel.index'\n",
Expand All @@ -432,17 +407,17 @@
"name": "stdout",
"output_type": "stream",
"text": [
"('science', 0.9998273665114539)\n",
"('multidisciplinary', 0.6123671233654022)\n",
"('sciences', 0.6045806407928467)\n",
"('astrobiology', 0.5991603136062622)\n",
"('aaas', 0.5971885621547699)\n",
"('bimonthly', 0.5882039070129395)\n",
"('interdisciplinary', 0.5875678360462189)\n",
"('psychohistory', 0.5828642845153809)\n",
"('protoscience', 0.5820913016796112)\n",
"('scientific', 0.5779787003993988)\n",
"('transhumanism', 0.5754979848861694)\n"
"('science', 1.0)\n",
"('multidisciplinary', 0.6066591441631317)\n",
"('astrobiology', 0.5995452105998993)\n",
"('actuarial', 0.5984143614768982)\n",
"('robotics', 0.5919757187366486)\n",
"('sciences', 0.5884003043174744)\n",
"('scientific', 0.5805909633636475)\n",
"('interdisciplinary', 0.5763890445232391)\n",
"('astronautics', 0.5748652517795563)\n",
"('psychohistory', 0.5744689702987671)\n",
"('aaas', 0.574154257774353)\n"
]
}
],
Expand Down Expand Up @@ -477,9 +452,7 @@
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"# Remove verbosity from code below (if logging active)\n",
Expand All @@ -491,9 +464,7 @@
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"from multiprocessing import Process\n",
Expand All @@ -517,16 +488,16 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Process Id: 311\n",
"Process Id: 18708\n",
"\n",
"Memory used by process 311: pmem(rss=534646784, vms=1907343360, shared=12107776, text=4096, lib=0, data=563171328, dirty=0) \n",
"Memory used by process 18708: pmem(rss=544612352, vms=2047995904, shared=10641408, text=4120576, lib=0, data=823377920, dirty=0)\n",
"---\n",
"Process Id: 320\n",
"Process Id: 18715\n",
"\n",
"Memory used by process 320: pmem(rss=534663168, vms=1907343360, shared=12107776, text=4096, lib=0, data=563204096, dirty=0) \n",
"Memory used by process 18715: pmem(rss=544624640, vms=2047995904, shared=10641408, text=4120576, lib=0, data=823386112, dirty=0)\n",
"---\n",
"CPU times: user 540 ms, sys: 180 ms, total: 720 ms\n",
"Wall time: 24.5 s\n"
"CPU times: user 464 ms, sys: 68 ms, total: 532 ms\n",
"Wall time: 45.3 s\n"
]
}
],
Expand Down Expand Up @@ -569,16 +540,16 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Process Id: 329\n",
"Process Id: 18733\n",
"\n",
"Memory used by process 329: pmem(rss=514174976, vms=1885904896, shared=142942208, text=4096, lib=0, data=411869184, dirty=0) \n",
"Memory used by process 18733: pmem(rss=525369344, vms=2028597248, shared=140480512, text=4120576, lib=0, data=674148352, dirty=0)\n",
"---\n",
"Process Id: 338\n",
"Process Id: 18740\n",
"\n",
"Memory used by process 338: pmem(rss=514174976, vms=1885904896, shared=142942208, text=4096, lib=0, data=411869184, dirty=0) \n",
"Memory used by process 18740: pmem(rss=525365248, vms=2028597248, shared=140480512, text=4120576, lib=0, data=674148352, dirty=0)\n",
"---\n",
"CPU times: user 490 ms, sys: 210 ms, total: 700 ms\n",
"Wall time: 2.62 s\n"
"CPU times: user 444 ms, sys: 96 ms, total: 540 ms\n",
"Wall time: 2.06 s\n"
]
}
],
Expand Down Expand Up @@ -617,9 +588,7 @@
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
Expand Down Expand Up @@ -719,9 +688,7 @@
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"# To export our model as text\n",
Expand All @@ -737,18 +704,19 @@
"name": "stdout",
"output_type": "stream",
"text": [
"71290 100\n",
"the 0.405333 0.074649 0.154192 0.091247 -0.036666 -0.079057 0.056531 0.012814 0.046281 0.056158 0.209166 -0.046209 0.252618 0.022687 0.239388 -0.122108 -0.028497 -0.098760 -0.334427 0.029130 0.117470 -0.237462 0.064778 -0.053481 -0.165359 0.223160 0.104593 0.144142 0.115136 0.142812 0.201899 0.171716 0.256478 0.142440 -0.150566 -0.175130 0.144592 0.156056 -0.181402 0.103827 -0.173085 0.053641 -0.085016 0.367614 -0.225947 0.033068 0.079073 0.134803 -0.303063 -0.104457 0.079638 -0.135635 -0.072654 0.001361 0.187478 -0.221080 -0.111177 0.071005 0.091342 0.020156 -0.157671 -0.075755 0.098052 -0.065106 0.201720 -0.064369 0.080100 -0.238081 -0.078123 -0.156004 -0.053440 0.234423 -0.117426 -0.127303 0.180088 -0.004023 -0.042677 0.059902 0.453670 -0.063391 -0.049869 0.060019 0.104559 0.085386 -0.071030 -0.117753 -0.032831 0.009222 0.100854 0.082896 -0.288745 -0.015596 -0.138211 0.017519 -0.044955 -0.002358 -0.084262 -0.127057 0.155300 0.342515\n",
"of 0.302899 0.135698 0.276234 0.060655 -0.121023 -0.036229 0.251403 0.087931 0.143489 0.086507 0.171695 -0.108421 0.168884 0.031430 0.128453 -0.157933 -0.041587 -0.012564 -0.242977 -0.134526 0.098855 -0.125527 0.114153 -0.197138 -0.167243 0.415763 -0.067183 0.244922 0.044159 0.178697 0.244680 0.156735 0.322327 0.050362 -0.196953 -0.211732 0.300875 0.184376 -0.071861 -0.000714 0.028612 0.156463 0.046373 0.274268 -0.103168 -0.144895 0.079764 0.314170 -0.236254 -0.108111 0.012367 -0.053291 0.079590 -0.057262 0.221644 -0.259905 -0.120234 0.005212 0.096316 -0.044126 -0.212473 -0.228809 0.089850 -0.023453 0.316282 0.087361 0.168300 -0.239052 0.062733 -0.178071 -0.023161 0.146075 -0.150015 -0.191352 0.136295 0.082557 -0.043620 0.213094 0.413238 -0.205452 -0.115454 -0.051733 0.132394 0.093741 -0.128791 -0.159032 0.015310 -0.135258 -0.099603 -0.042002 -0.193415 -0.032718 -0.341820 0.002871 -0.069954 -0.009055 -0.073843 -0.043583 0.052326 0.348435\n"
"b'71290 100'\n",
"b'the 0.141686 0.255228 -0.191478 0.232801 0.094346 0.120224 0.075487 0.032936 0.154292 -0.063886 -0.321305 0.128102 0.072219 0.081531 -0.080868 -0.000505 -0.094688 -0.031570 -0.022748 -0.030894 0.118537 -0.091672 0.268565 0.017336 -0.158142 0.028882 -0.354505 -0.248104 0.114017 -0.132821 -0.068284 -0.311653 -0.109148 0.071787 0.391749 0.027252 -0.192908 0.323144 0.100474 -0.049426 -0.157461 -0.289598 0.148029 0.059920 -0.084889 -0.012278 0.041439 0.109375 -0.123536 -0.001224 0.112495 -0.138175 0.114445 -0.208958 0.253858 -0.033594 0.145608 0.295680 -0.008925 0.032524 0.192903 0.035965 0.135603 -0.103187 0.162365 0.031851 0.017547 -0.106019 0.094497 0.071965 0.068053 0.024725 -0.003645 0.001062 0.078102 -0.172048 0.093869 -0.035663 -0.166211 0.176462 0.049964 -0.114905 0.024031 -0.058539 -0.117258 -0.351215 -0.025666 -0.211885 0.036296 -0.326675 -0.182654 -0.019680 -0.189521 -0.206698 -0.100391 0.120583 0.076890 -0.010218 0.084345 -0.277560'\n",
"b'of 0.042654 0.329115 -0.062874 0.331052 0.041591 0.141496 0.023409 0.054587 0.003090 0.059803 -0.190404 0.169919 -0.001547 -0.005588 0.060066 0.089611 -0.072265 -0.230048 -0.028314 -0.115761 0.126566 -0.054547 0.366766 0.045456 0.011724 0.010946 -0.237676 -0.323509 0.232554 -0.039293 -0.049269 -0.085853 -0.215061 0.130000 0.347488 0.165928 -0.169574 0.305217 -0.017916 0.034427 -0.133006 -0.144247 0.150204 0.120708 0.053237 -0.183496 0.053565 0.030120 -0.115428 0.030555 0.115227 -0.206632 -0.043280 -0.194560 0.220410 -0.107236 -0.003629 0.253298 0.048558 -0.040416 0.225557 0.091650 0.052787 -0.052910 0.101683 0.113876 -0.105539 -0.056264 0.159010 0.211075 0.057890 -0.017479 0.124350 0.032155 0.097972 -0.220727 0.148302 -0.019309 -0.098981 0.180954 -0.064003 -0.011532 0.148809 0.071048 0.002689 -0.310323 -0.272785 -0.213483 0.030733 -0.217041 -0.346220 0.031555 -0.209962 -0.303856 -0.218638 0.012904 0.188286 0.030006 0.090853 -0.374457'\n"
]
}
],
"source": [
"from smart_open import smart_open\n",
"# View the first 3 lines of the exported file\n",
"\n",
"# The first line has the total number of entries and the vector dimension count. \n",
"# The next lines have a key (a string) followed by its vector.\n",
"with open('/tmp/vectors.txt') as myfile:\n",
"with smart_open('/tmp/vectors.txt') as myfile:\n",
" for i in range(3):\n",
" print(myfile.readline().strip())"
]
Expand Down Expand Up @@ -900,7 +868,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.0"
"version": "3.5.3"
}
},
"nbformat": 4,
Expand Down
Loading