{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "#palabras:  43981\n",
      "# vector:  300\n",
      "[-3.45608e-02  9.21620e-02 -2.59729e-02  1.02635e-01 -2.01081e-02\n",
      " -1.74898e-02  6.78648e-02 -8.46215e-02 -4.58716e-02  3.93783e-02\n",
      "  1.02216e-01  8.79728e-02  1.23162e-01 -5.40405e-02 -2.95337e-02\n",
      "  6.19999e-02 -2.91148e-02  4.50337e-02  4.58716e-02 -6.49323e-02\n",
      "  1.92702e-02  2.47162e-02 -4.50337e-02 -2.54493e-02 -2.18885e-02\n",
      "  5.55067e-03 -4.14729e-02  6.07432e-02  1.97939e-02  1.13632e-02\n",
      "  3.33040e-02  7.85472e-03  4.54526e-02  1.15203e-02  3.07905e-02\n",
      " -1.37196e-02 -1.29341e-02  1.57094e-02  2.67060e-02  4.62905e-02\n",
      " -6.70269e-02  4.26773e-03  8.95438e-03  3.83310e-02  6.87026e-02\n",
      " -1.36148e-02  1.09757e-01  1.13946e-01 -1.88513e-02  2.57635e-02\n",
      " -6.61891e-02 -4.16824e-02 -5.08986e-02 -6.70269e-02  1.02216e-01\n",
      "  8.79728e-02  1.63378e-02 -9.97026e-02 -3.72837e-02 -6.40945e-02\n",
      "  4.52432e-02  5.82296e-02 -1.18135e-01 -7.62431e-02 -1.52905e-02\n",
      " -6.70269e-02 -3.66554e-02  1.02635e-01  7.64526e-03  6.36756e-02\n",
      " -8.79728e-02 -8.43073e-03  9.04864e-02  5.36215e-02  5.55067e-03\n",
      "  5.86486e-02  1.13946e-01 -2.70202e-02  6.15810e-02  1.85371e-02\n",
      " -1.87466e-02  2.57635e-02 -7.62431e-02 -5.48783e-02  3.47702e-02\n",
      " -7.28918e-02 -6.91215e-02  6.24188e-02  8.46215e-02  5.11080e-02\n",
      " -3.74932e-02 -6.95404e-02 -2.37736e-02 -1.68615e-02 -7.87566e-02\n",
      " -1.24105e-02 -2.80152e-03 -1.34054e-02 -8.08512e-02 -2.55540e-02\n",
      "  4.58716e-02 -5.52972e-02 -9.71891e-02 -3.49797e-02 -8.12702e-02\n",
      " -2.80675e-02  5.65540e-02  3.70743e-02  3.60270e-02 -4.75472e-02\n",
      "  1.50811e-02  6.91215e-02 -5.82296e-02  4.75472e-02  7.07972e-02\n",
      "  4.45101e-03 -2.06317e-02  9.58276e-03  2.78581e-02 -3.37229e-02\n",
      " -8.32600e-03  3.63935e-03  3.37229e-02 -3.62364e-02  2.01081e-02\n",
      "  2.84864e-02  4.46148e-02 -8.83918e-02  3.39324e-02  3.41418e-02\n",
      "  2.15743e-02  1.95844e-02 -6.49323e-02 -4.60810e-02  1.69662e-02\n",
      " -1.35730e-01  1.43480e-02  1.08919e-01 -3.28851e-02  8.53546e-03\n",
      " -8.25269e-02 -1.28189e-01  6.03242e-02 -1.17821e-02 -1.34054e-02\n",
      " -4.71283e-02  5.11080e-02  6.19999e-02  7.70810e-02  5.73918e-02\n",
      "  2.16162e-01  3.07905e-02 -3.95354e-03 -5.83867e-03 -4.73378e-02\n",
      " -4.33580e-02 -2.17314e-03 -3.45608e-02 -3.83310e-02 -1.61284e-02\n",
      " -3.51891e-02  3.69172e-03  3.45608e-02 -5.36215e-02 -7.24729e-02\n",
      "  6.61891e-02 -1.05855e-05 -2.46115e-02 -4.12635e-02 -1.13239e-03\n",
      " -1.78459e-01 -5.36215e-02  4.90134e-02 -6.15810e-02  9.63512e-02\n",
      "  8.90201e-03  1.69662e-02  5.82296e-02 -4.52432e-02  1.01378e-01\n",
      "  1.82229e-02  2.30405e-02 -9.13242e-02  4.69189e-02 -3.58699e-03\n",
      " -5.78107e-02  8.42026e-02 -2.63919e-02  8.88107e-02 -3.43513e-02\n",
      "  5.57161e-02 -3.16283e-02  1.72804e-03 -9.32093e-03  1.81182e-02\n",
      "  3.35135e-02 -1.59189e-02  5.29932e-02 -3.77026e-02 -5.65540e-02\n",
      " -6.45134e-02 -5.44594e-03  1.00017e-02  6.59796e-03 -1.86838e-01\n",
      " -2.15743e-02 -6.87026e-02  4.64999e-02 -8.50404e-02 -3.95354e-03\n",
      "  6.53513e-02  6.54560e-03 -6.70269e-02 -2.97432e-02  1.17821e-02\n",
      "  9.04864e-02 -2.07365e-02 -1.59189e-02 -4.85945e-02 -6.70269e-02\n",
      " -5.44594e-02  3.79121e-02  5.61351e-02  2.19932e-02  3.33040e-02\n",
      " -7.38344e-03  4.60810e-02 -3.24662e-02  1.00017e-02 -7.24729e-02\n",
      "  6.40945e-02  1.22534e-02  9.88647e-02 -4.41959e-02  9.42566e-02\n",
      " -1.09442e-02 -1.17297e-02  1.67567e-03  6.70269e-02 -3.41418e-02\n",
      "  8.50404e-02 -3.95878e-02 -8.54593e-02  2.54493e-02  6.30996e-03\n",
      " -4.06351e-02 -6.07432e-02 -6.74459e-02 -6.61891e-02 -5.69729e-02\n",
      "  4.27297e-02 -1.39290e-02  8.90201e-04  1.33007e-02  1.22534e-02\n",
      " -1.28189e-01 -8.33647e-02  2.07365e-02 -7.16350e-02 -2.60777e-02\n",
      "  2.70202e-02 -3.69172e-03 -1.18973e-01  1.64425e-02  4.18918e-03\n",
      "  3.62364e-02  1.43270e-01  2.78843e-03 -1.07871e-02 -5.73918e-02\n",
      " -4.12635e-02  2.56587e-02 -4.44053e-02  7.83377e-02  1.75946e-02\n",
      " -1.35101e-02 -6.57178e-03 -1.65892e-01 -1.05149e-01 -8.21080e-02\n",
      "  7.49864e-02 -3.91689e-02  1.27246e-02  6.80742e-03 -8.16891e-02\n",
      "  8.12702e-02 -6.74459e-02  3.95878e-02  3.89594e-02  9.73985e-03\n",
      "  1.21748e-03 -2.74392e-02 -7.28918e-02 -2.37736e-02  1.31436e-02\n",
      "  2.80675e-02 -7.62431e-02  5.65540e-02 -1.28294e-02  1.03159e-02]\n",
      "-0.6270861061348114\n",
      "------\n",
      "[ 2.48629972e-03  1.03602998e-01 -1.10154012e-02  1.91389710e-01\n",
      "  1.95145011e-02 -4.42351028e-02  1.31260991e-01 -1.03640392e-01\n",
      " -4.77660596e-02  2.52930075e-03  2.03253999e-01  1.06496394e-01\n",
      "  1.50105402e-01 -9.40593034e-02 -7.25241974e-02  1.05188593e-01\n",
      " -1.10341199e-01  5.07542193e-02  1.01343304e-01 -1.00592703e-01\n",
      "  1.02477700e-01  9.40558016e-02 -4.03037518e-02  4.42865044e-02\n",
      " -1.99197493e-02 -1.36298329e-01 -4.05504368e-02  1.35629907e-01\n",
      "  6.83315992e-02  1.16039999e-03  7.47095942e-02 -3.55320796e-02\n",
      "  6.16979003e-02  1.45662902e-02 -3.53792980e-02 -7.43421987e-02\n",
      "  7.97827989e-02 -4.69619967e-03  4.55266982e-02  9.70074981e-02\n",
      " -6.17273673e-02  5.61733283e-02  5.37279807e-02  1.78262983e-02\n",
      "  8.26696008e-02  4.50267009e-02  1.79492801e-01  1.02653600e-01\n",
      " -6.38229996e-02  1.18876696e-01 -1.13736197e-01 -9.12107006e-02\n",
      " -5.23411110e-02 -1.13187298e-01  1.88197106e-01  3.18039954e-03\n",
      " -2.60583982e-02 -8.18723962e-02 -4.22860608e-02 -1.28679395e-01\n",
      "  9.38469917e-03  4.19842973e-02 -7.49462992e-02 -9.68469009e-02\n",
      "  1.62095018e-02 -7.59299845e-03 -1.06787398e-01  1.86634898e-01\n",
      "  1.66098792e-02  8.01189989e-02 -1.62463307e-01  4.07012701e-02\n",
      "  1.25750497e-01  6.85790032e-02 -3.96191292e-02  2.02874601e-01\n",
      "  2.30435997e-01 -2.57262699e-02  1.31713003e-01  8.27257037e-02\n",
      "  1.46090053e-03 -4.19912003e-02 -1.37261897e-01  2.95179002e-02\n",
      "  5.71569987e-02 -5.68445995e-02 -5.40649034e-02  1.58701807e-01\n",
      "  1.51979893e-01  3.96173969e-02 -4.48729098e-02 -1.46408290e-01\n",
      " -2.93207690e-02 -7.90690035e-02 -8.59877318e-02  2.58252993e-02\n",
      "  6.17833845e-02  1.23492992e-02 -1.06209695e-01  3.56860086e-03\n",
      "  2.64565013e-02 -6.07205480e-02 -1.53453201e-01 -1.01165008e-02\n",
      " -1.69628605e-01 -6.19447976e-02  6.18040003e-02  5.96592017e-02\n",
      "  2.72109695e-02 -1.31943405e-01 -7.84282982e-02  1.38064906e-01\n",
      " -7.18993992e-02  2.92216986e-02  1.37759387e-01  4.34885779e-03\n",
      "  6.50979951e-03 -1.06247403e-02 -2.95947008e-02 -9.94964018e-02\n",
      " -5.38920015e-02 -2.52692006e-03  4.37771007e-02 -4.50524315e-02\n",
      " -4.06170078e-03  1.01609007e-02  1.11337006e-02 -1.43467203e-01\n",
      "  3.51767987e-02  6.74248040e-02  5.84232993e-02 -1.24641597e-01\n",
      " -1.43781304e-01 -8.74865949e-02 -3.69205996e-02 -1.47864401e-01\n",
      "  2.53928006e-02  1.08914770e-01 -4.41775024e-02  3.70637588e-02\n",
      " -2.26752907e-01 -2.72414982e-01  1.13022298e-01 -9.92479082e-03\n",
      " -2.40045004e-02  4.00415026e-02  1.18815973e-02  2.33677998e-02\n",
      "  6.80668503e-02  5.75844944e-02  3.80199999e-01 -4.13227007e-02\n",
      " -4.06044386e-02  5.47839291e-02 -1.30941510e-01 -9.56598967e-02\n",
      "  1.01637863e-01 -1.31636202e-01  5.25390357e-03 -4.56473008e-02\n",
      " -1.27509803e-01  4.98521216e-02  1.17372103e-01  2.56237015e-02\n",
      " -1.03972897e-01  7.08695203e-02 -1.24916853e-02 -1.09800100e-01\n",
      " -1.64002981e-02  2.02638097e-02 -2.66817391e-01 -3.76733989e-02\n",
      "  5.54520786e-02 -1.68561995e-01  1.03186101e-01  3.54492068e-02\n",
      "  2.39496902e-02  1.07956000e-01 -4.10085283e-02  2.01622993e-01\n",
      "  1.69351604e-02  4.66159992e-02 -8.43902454e-02  6.67302012e-02\n",
      "  3.08847111e-02 -5.26102334e-02  4.93347012e-02 -5.19484989e-02\n",
      "  1.80338889e-01  3.09209898e-03  8.52349997e-02 -6.82791993e-02\n",
      " -7.08798971e-03  6.31884709e-02 -9.04477984e-02  2.30502337e-04\n",
      "  3.08357999e-02 -2.30821967e-02 -4.06866819e-02 -6.44785240e-02\n",
      " -1.07503898e-01 -1.43610295e-02  9.67752039e-02 -1.08308040e-01\n",
      " -2.14177608e-01 -1.40442297e-01 -6.16200604e-02 -5.65190986e-02\n",
      " -1.05347000e-01 -4.53240238e-03  1.01605996e-01 -5.48694991e-02\n",
      "  5.08630276e-03 -1.74762189e-01 -1.20557897e-01  2.22033411e-01\n",
      " -6.19440004e-02  8.25090148e-03 -1.03669897e-01 -1.25272095e-01\n",
      " -1.04978204e-01  4.68701124e-04  6.62883967e-02  8.14270973e-02\n",
      "  8.32284987e-02  3.71085992e-03 -5.85229993e-02  1.34960003e-02\n",
      "  4.11055014e-02 -8.74304026e-02  1.48490697e-01  4.43476997e-02\n",
      "  1.19072199e-01  2.11814009e-02  1.02874525e-01  2.31312998e-02\n",
      " -8.03983957e-03 -7.67771304e-02  7.36141577e-02 -4.61276025e-02\n",
      "  1.13568693e-01 -6.10831007e-02  8.05009902e-03 -1.00129005e-02\n",
      " -1.04633033e-01 -8.77860039e-02 -1.27309203e-01 -9.41911936e-02\n",
      " -5.95523119e-02 -5.94616979e-02  5.57061024e-02  2.56936010e-02\n",
      "  5.71543016e-02  7.11496994e-02 -2.32088007e-02 -2.36754999e-01\n",
      " -3.64119038e-02  1.01870010e-02 -6.38095289e-02  7.61482939e-02\n",
      "  4.56428006e-02  5.29685766e-02 -2.19218001e-01 -2.10008994e-02\n",
      "  4.30193767e-02  1.34534016e-02  1.58029407e-01  2.97318306e-02\n",
      " -2.97068991e-02 -3.31229009e-02 -5.59985638e-05  6.63980097e-03\n",
      " -7.55630061e-03  1.63130105e-01 -1.41035002e-02 -1.15736097e-01\n",
      "  1.17843226e-01 -2.93476999e-01 -1.29814103e-01 -2.08108008e-01\n",
      "  2.74393000e-02 -5.96735999e-02 -4.27470990e-02  1.09033413e-01\n",
      " -1.59349397e-01  8.10720921e-02 -1.05285496e-01  1.19229302e-01\n",
      "  1.00374497e-01  1.30984843e-01 -4.25655209e-02 -3.71467397e-02\n",
      " -1.32722005e-01 -5.09150997e-02  5.57378978e-02  4.78919968e-03\n",
      " -1.19233601e-01  5.14773503e-02  2.40195990e-02  5.90516999e-02]\n"
     ]
    }
   ],
   "source": [
    "# Word embeddings\n",
    "import gensim\n",
    "from nltk.data import find\n",
    "\n",
    "# Cargar el modelo de embeding pre-entrenados del NLTK\n",
    "word2vec_sample = str(find('models/word2vec_sample/pruned.word2vec.txt'))\n",
    "model = gensim.models.KeyedVectors.load_word2vec_format(word2vec_sample, binary=False)\n",
    "\n",
    "print(\"#palabras: \", len(model))\n",
    "\n",
    "# Tamaño del vector\n",
    "print (\"# vector: \",len(model['class']))\n",
    "print (model['class'])\n",
    "print (sum(model['class']))\n",
    "a=[]\n",
    "a.append(model['class'])\n",
    "a.append(model['school'])\n",
    "print(\"------\")\n",
    "print(a[0]+a[1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('soccer', 0.7313547134399414),\n",
       " ('Football', 0.7124834656715393),\n",
       " ('basketball', 0.6682467460632324),\n",
       " ('athletics', 0.6265193223953247),\n",
       " ('baseball', 0.6161999106407166),\n",
       " ('sports', 0.5927177667617798),\n",
       " ('coaches', 0.579153835773468),\n",
       " ('athletic', 0.5767539739608765),\n",
       " ('Soccer', 0.5632802248001099),\n",
       " ('hockey', 0.5496212244033813)]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#Buscar n palabras similares a una dada\n",
    "model.most_similar(positive=['football'], topn = 10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'house'"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#Palabras que no son de un dominio temático\n",
    "\n",
    "model.doesnt_match(['breakfast','house', 'dinner', 'lunch'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Mikolov et al. (2013) figured out that word embedding captures much of syntactic and semantic regularities. \n",
    "#For example, the vector 'King - Man + Woman' is close to 'Queen' and 'Germany - Berlin + Paris' is close to 'France'."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('queen', 0.7118193507194519)]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.most_similar(positive=['woman','king'], negative=['man'], topn = 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('France', 0.7884091734886169)]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\n",
    "model.most_similar(positive=['Paris','Germany'], negative=['Berlin'], topn = 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.054031882"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.similarity('woman','bad')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.12128081"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.similarity('man','bad')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.7190051"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.similarity('good','bad')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.22174425"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.similarity('handsome','ugly')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.44486597"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.similarity('handsome','beautiful')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('queen', 0.7118193507194519),\n",
       " ('monarch', 0.6189674139022827),\n",
       " ('princess', 0.5902430415153503),\n",
       " ('prince', 0.5377321243286133),\n",
       " ('kings', 0.5236843228340149),\n",
       " ('queens', 0.5181134939193726),\n",
       " ('throne', 0.5005807280540466),\n",
       " ('royal', 0.493820458650589),\n",
       " ('ruler', 0.49092739820480347),\n",
       " ('princes', 0.481081485748291)]"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.most_similar(positive=['king','woman'],negative=['man'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.2596795"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.similarity('chocolate','salt')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.42128563"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.similarity('chocolate','sweet')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "fi\n"
     ]
    }
   ],
   "source": [
    "print (\"fi\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.18"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
