{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Ejemplo de chunker usando expresiones regulares."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import nltk\n",
    "ejemplo=[('El', 'da'), ('grupo', 'nc'), ('estatal', 'aq'), ('Electricité_de_France', 'np'), ('-Fpa-', 'Fpa'), ('EDF', 'np'), ('-Fpt-', 'Fpt'), ('anunció', 'vmi'), ('hoy', 'rg'), (',', 'Fc'), ('jueves', 'W'), (',', 'Fc'), ('la', 'da'), ('compra', 'nc'), ('del', 'sp'), ('51_por_ciento', 'Zp'), ('de', 'sp'), ('la', 'da'), ('empresa', 'nc'), ('mexicana', 'aq'), ('Electricidad_Águila_de_Altamira', 'np'), ('-Fpa-', 'Fpa'), ('EAA', 'np'), ('-Fpt-', 'Fpt'), (',', 'Fc'), ('creada', 'aq'), ('por', 'sp'), ('el', 'da'), ('japonés', 'aq'), ('Mitsubishi_Corporation', 'np'), ('para', 'sp'), ('poner_en_marcha', 'vmn'), ('una', 'di'), ('central', 'nc'), ('de', 'sp'), ('gas', 'nc'), ('de', 'sp'), ('495', 'Z'), ('megavatios', 'nc'), ('.', 'Fp')]\n",
    "gramatica=r\"\"\"SN: {<d.*>? <aq>* <nc|np> <aq>*}\n",
    "                  {<Z.*>* <nc>* <W>*}\n",
    "              SA: {<aq>}\n",
    "              SV: {<v.*>*}\n",
    "              SP: {<sp>}\n",
    "           \"\"\"\n",
    "\n",
    "gram_comp=nltk.RegexpParser(gramatica)\n",
    "resultado=gram_comp.parse(ejemplo)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(S\n",
      "  (SN El/da grupo/nc estatal/aq)\n",
      "  (SN Electricité_de_France/np)\n",
      "  -Fpa-/Fpa\n",
      "  (SN EDF/np)\n",
      "  -Fpt-/Fpt\n",
      "  (SV anunció/vmi)\n",
      "  hoy/rg\n",
      "  ,/Fc\n",
      "  (SN jueves/W)\n",
      "  ,/Fc\n",
      "  (SN la/da compra/nc)\n",
      "  (SP del/sp)\n",
      "  (SN 51_por_ciento/Zp)\n",
      "  (SP de/sp)\n",
      "  (SN la/da empresa/nc mexicana/aq)\n",
      "  (SN Electricidad_Águila_de_Altamira/np)\n",
      "  -Fpa-/Fpa\n",
      "  (SN EAA/np)\n",
      "  -Fpt-/Fpt\n",
      "  ,/Fc\n",
      "  (SA creada/aq)\n",
      "  (SP por/sp)\n",
      "  (SN el/da japonés/aq Mitsubishi_Corporation/np)\n",
      "  (SP para/sp)\n",
      "  (SV poner_en_marcha/vmn)\n",
      "  (SN una/di central/nc)\n",
      "  (SP de/sp)\n",
      "  (SN gas/nc)\n",
      "  (SP de/sp)\n",
      "  (SN 495/Z)\n",
      "  (SN megavatios/nc)\n",
      "  ./Fp)\n",
      "<class 'nltk.tree.tree.Tree'>\n"
     ]
    }
   ],
   "source": [
    "print (resultado)\n",
    "print (type(resultado))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "resultado.draw()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[('El', 'da', 'B-SN'), ('grupo', 'nc', 'I-SN'), ('estatal', 'aq', 'I-SN'), ('Electricité_de_France', 'np', 'B-SN'), ('-Fpa-', 'Fpa', 'O'), ('EDF', 'np', 'B-SN'), ('-Fpt-', 'Fpt', 'O'), ('anunció', 'vmi', 'B-SV'), ('hoy', 'rg', 'O'), (',', 'Fc', 'O'), ('jueves', 'W', 'B-SN'), (',', 'Fc', 'O'), ('la', 'da', 'B-SN'), ('compra', 'nc', 'I-SN'), ('del', 'sp', 'B-SP'), ('51_por_ciento', 'Zp', 'B-SN'), ('de', 'sp', 'B-SP'), ('la', 'da', 'B-SN'), ('empresa', 'nc', 'I-SN'), ('mexicana', 'aq', 'I-SN'), ('Electricidad_Águila_de_Altamira', 'np', 'B-SN'), ('-Fpa-', 'Fpa', 'O'), ('EAA', 'np', 'B-SN'), ('-Fpt-', 'Fpt', 'O'), (',', 'Fc', 'O'), ('creada', 'aq', 'B-SA'), ('por', 'sp', 'B-SP'), ('el', 'da', 'B-SN'), ('japonés', 'aq', 'I-SN'), ('Mitsubishi_Corporation', 'np', 'I-SN'), ('para', 'sp', 'B-SP'), ('poner_en_marcha', 'vmn', 'B-SV'), ('una', 'di', 'B-SN'), ('central', 'nc', 'I-SN'), ('de', 'sp', 'B-SP'), ('gas', 'nc', 'B-SN'), ('de', 'sp', 'B-SP'), ('495', 'Z', 'B-SN'), ('megavatios', 'nc', 'B-SN'), ('.', 'Fp', 'O')]]\n"
     ]
    }
   ],
   "source": [
    "kk= [nltk.chunk.tree2conlltags(resultado)]\n",
    "print (kk)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
