From 38ba8904e67ebc1188b60bdc0add1d10ce66c820 Mon Sep 17 00:00:00 2001 From: tink Date: Fri, 7 Jun 2024 16:05:44 +0800 Subject: [PATCH] add jupyter --- Makefile | 7 + .../Go-Frameworks-Github-Fork-Stats.ipynb | 232 + docs/jupyter/Pandas完全指南.ipynb | 8346 +++++++++++++++++ .../Spark上手示例1:RDD操作.ipynb | 610 ++ ...Spark上手示例2:DataFrame操作.ipynb | 1028 ++ docs/language/Go.md | 164 + mkdocs.yml | 17 +- 7 files changed, 10401 insertions(+), 3 deletions(-) create mode 100644 docs/jupyter/Go-Frameworks-Github-Fork-Stats.ipynb create mode 100644 docs/jupyter/Pandas完全指南.ipynb create mode 100644 docs/jupyter/Spark上手示例1:RDD操作.ipynb create mode 100644 docs/jupyter/Spark上手示例2:DataFrame操作.ipynb diff --git a/Makefile b/Makefile index 2dd6abb..ab9b3a2 100644 --- a/Makefile +++ b/Makefile @@ -13,3 +13,10 @@ html: publish: ssh root@www.cyub.vip "cd ${HTML_OUTPUT}; git pull" +plugin: + pip install mkdocs-git-revision-date-localized-plugin # 显示文档编辑时间 + pip install mkdocs-mermaid2-plugin # mermaid图表支持 + pip install mkdocs-charts-plugin # image box + pip install mkdocs-print-site-plugin # print site + pip install mkdocs-jupyter # jupyter + diff --git a/docs/jupyter/Go-Frameworks-Github-Fork-Stats.ipynb b/docs/jupyter/Go-Frameworks-Github-Fork-Stats.ipynb new file mode 100644 index 0000000..b15f6d9 --- /dev/null +++ b/docs/jupyter/Go-Frameworks-Github-Fork-Stats.ipynb @@ -0,0 +1,232 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING: pip is being invoked by an old script wrapper. This will fail in a future version of pip.\n", + "Please see https://github.com/pypa/pip/issues/5599 for advice on fixing the underlying issue.\n", + "To avoid this problem you can invoke Python with '-m pip' instead of running pip directly.\n", + "Defaulting to user installation because normal site-packages is not writeable\n", + "Requirement already satisfied: pandas in /home/deploy/.local/lib/python3.6/site-packages (1.0.0)\n", + "Requirement already satisfied: matplotlib in /home/deploy/.local/lib/python3.6/site-packages (3.1.3)\n", + "Requirement already satisfied: python-dateutil>=2.6.1 in /home/deploy/.local/lib/python3.6/site-packages (from pandas) (2.8.1)\n", + "Requirement already satisfied: pytz>=2017.2 in /home/deploy/.local/lib/python3.6/site-packages (from pandas) (2019.3)\n", + "Requirement already satisfied: numpy>=1.13.3 in /home/deploy/.local/lib/python3.6/site-packages (from pandas) (1.18.1)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /home/deploy/.local/lib/python3.6/site-packages (from matplotlib) (1.1.0)\n", + "Requirement already satisfied: cycler>=0.10 in /home/deploy/.local/lib/python3.6/site-packages (from matplotlib) (0.10.0)\n", + "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /home/deploy/.local/lib/python3.6/site-packages (from matplotlib) (2.4.6)\n", + "Requirement already satisfied: six>=1.5 in /home/deploy/.local/lib/python3.6/site-packages (from python-dateutil>=2.6.1->pandas) (1.14.0)\n", + "Requirement already satisfied: setuptools in /home/deploy/.local/lib/python3.6/site-packages (from kiwisolver>=1.0.1->matplotlib) (45.1.0)\n" + ] + } + ], + "source": [ + "!pip install pandas matplotlib" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
forksstarswatchsopenIssues
Gin4074354551212242
Beego4688232431268813
Iris1942175076835
Revel13571157555887
Echo15081650055146
Buffalo430537217170
\n", + "
" + ], + "text/plain": [ + " forks stars watchs openIssues\n", + "Gin 4074 35455 1212 242\n", + "Beego 4688 23243 1268 813\n", + "Iris 1942 17507 683 5\n", + "Revel 1357 11575 558 87\n", + "Echo 1508 16500 551 46\n", + "Buffalo 430 5372 171 70" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 统计go框架fork次数信息\n", + "\n", + "frameworks = {\n", + " \"Gin\":\"gin-gonic/gin\",\n", + " \"Beego\": \"astaxie/beego\",\n", + " \"Iris\": \"kataras/iris\",\n", + " \"Revel\": \"revel/revel\",\n", + " \"Echo\": \"labstack/echo\",\n", + " \"Buffalo\": \"gobuffalo/buffalo\"\n", + "}\n", + "\n", + "\n", + "stats = {}\n", + "for name in frameworks.keys():\n", + " url = \"https://api.github.com/repos/\" + frameworks[name]\n", + " stats[name] = requests.get(url=url).json() # 获取仓库统计信息\n", + "\n", + "indexs = []\n", + "forks = []\n", + "stars = []\n", + "watchs = []\n", + "openIssues = []\n", + "\n", + "for name in stats:\n", + " indexs += [name]\n", + " forks += [stats[name]['forks_count']] # fork次数\n", + " stars += [stats[name]['watchers_count']] # star次数\n", + " watchs += [stats[name]['subscribers_count']] # watch次数\n", + " openIssues += [stats[name]['open_issues_count']] # open_issue次数\n", + "\n", + "df = pd.DataFrame({\n", + " 'forks':forks,\n", + " 'stars':stars,\n", + " 'watchs':watchs,\n", + " 'openIssues': openIssues\n", + "}, index = indexs)\n", + "\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA3kAAAHrCAYAAABywVS0AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAgAElEQVR4nO3dfbhedXkn+u/NO/IOiYwQpjAt8v6SEBAbQ3k5YiieSqm2dECgqEwVS22nDDjjGZAWa49ombSKg4riVEXEUlFQjBoLzBEl0MhbcIyAJUAxBQwIxRL4nT/2It2GHZKdvcOz98rnc13Ptde611q/537iI9nfrLV+q1prAQAAoB82GHQDAAAAjB8hDwAAoEeEPAAAgB4R8gAAAHpEyAMAAOiRjQbdwNqaMmVK23XXXQfdBgAAwEDccsst/9xam7pyfdKGvF133TULFiwYdBsAAAADUVU/Hqnuck0AAIAeEfIAAAB6RMgDAADokUl7Tx4AADA4zzzzTJYsWZKnn3560K303mabbZZp06Zl4403XqP9hTwAAGDUlixZkq222iq77rprqmrQ7fRWay2PPPJIlixZkt12222NjnG5JgAAMGpPP/10dthhBwFvHauq7LDDDqM6YyrkAQAAa0XAe2mM9s9ZyAMAAOgR9+QBAABjtus514zrePe9/9jV7jN37txcfPHFmTFjRj7zmc+sdv9PfepTWbBgQf76r/96PFqcsIQ8AABgUvrIRz6Sb3zjG5k2bdpq912+fPlL0NHE4HJNAABg0vn93//93HPPPTnmmGPywQ9+MMcdd1z233//HHroobntttuSJOedd17e/OY3Z9asWXnzm9/8C8dfc801efWrX51//ud/zhe+8IXsu+++OeCAA3LYYYcN4uOMKyEPAACYdD760Y9mp512yvz583Pfffdl+vTpue222/K+970vJ5988or97rrrrnzjG9/I5z73uRW1q666Ku9///tz7bXXZsqUKTn//PNz3XXX5fvf/36uvvrqQXycceVyTQAAYFK78cYb88UvfjFJcuSRR+aRRx7J448/niT5jd/4jWy++eYr9v3Wt76VBQsW5Otf/3q23nrrJMmsWbNy6qmn5rd/+7dz/PHHv/QfYJw5kwcAAPTWFlts8Qvrv/zLv5wnnngi/+f//J8VtY9+9KP5sz/7s9x///056KCD8sgjj7zUbY4rIQ8AAJjUZs+evWJ2zW9/+9uZMmXKirN0K/ulX/qlfPGLX8zJJ5+cO++8M0nyox/9KK961aty/vnnZ+rUqbn//vtfst7XBZdrAgAAY7YmjzxYV84777ycdtpp2X///fOyl70sl1122Yvuv+eee+Yzn/lM3vSmN+XLX/5yzjrrrPzwhz9May1HHXVUDjjggJeo83WjWmuD7mGtzJw5sy1YsGDQbQAAwHpp0aJF2WuvvQbdxnpjpD/vqrqltTZz5X1drgkAANAjQh4AAECPuCdvojhvm3EYY9nYxwAAACY1Z/IAAAB6ZLUhr6o2q6rvVdX3q+rOqnpvV/9UVd1bVQu714FdvapqblUtrqrbqmrGsLFOqaofdq9ThtUPqqrbu2PmVlWtiw8LAADQd2tyuebPkxzZWvtZVW2c5Maq+mq37azW2pUr7X9Mkt2716uSXJzkVVW1fZJzk8xM0pLcUlVXt9Ye6/Z5W5LvJrk2yZwkXw0AAACjstqQ14aesfCzbnXj7vViz114Q5JPd8fdVFXbVtUrkhyeZF5r7dEkqap5SeZU1beTbN1au6mrfzrJcRHyAABg8hiPOSZ+YbzRzzdx0UUX5fTTT8/LXvay8e1lklmje/KqasOqWpjkJxkKat/tNl3QXZL5l1W1aVfbOcnwR8Qv6WovVl8yQn2kPk6vqgVVtWDp0qVr0joAALCeuOiii/LUU0+N6phnn312HXUzOGsU8lprz7bWDkwyLckhVbVvkncn2TPJwUm2T3L2Ouvy3/q4pLU2s7U2c+rUqev67QAAgAnqySefzLHHHpsDDjgg++67b9773vfmwQcfzBFHHJEjjjgiSfL2t789M2fOzD777JNzzz13xbG77rprzj777MyYMSNf+MIXMnfu3Oy9997Zf//9c8IJJwzqI42bUT1CobX206qan2ROa+3Crvzzqvpkkj/p1h9Issuww6Z1tQcydMnm8Pq3u/q0EfYHAAAY0de+9rXstNNOueaaa5Iky5Ytyyc/+cnMnz8/U6ZMSZJccMEF2X777fPss8/mqKOOym233Zb9998/SbLDDjvk1ltvTZLstNNOuffee7Ppppvmpz/96WA+0Dhak9k1p1bVtt3y5klem+Tu7j67dDNhHpfkju6Qq5Oc3M2yeWiSZa21h5Jcl+ToqtquqrZLcnSS67ptj1fVod1YJyf50vh+TAAAoE/222+/zJs3L2effXZuuOGGbLPNC+8JvOKKKzJjxoxMnz49d955Z+66664V237nd35nxfL++++fE088MX/zN3+TjTaa/I8SX5NP8Iokl1XVhhkKhVe01r5SVd+qqqlJKsnCJL/f7X9tkl9PsjjJU0l+L0laa49W1Z8mubnb7/znJ2FJ8o4kn0qyeYYmXDHpCgAAsEqvfOUrc+utt+baa6/Ne97znhx11FG/sP3ee+/NhRdemJtvvjnbbbddTj311Dz99NMrtm+xxRYrlq+55ppcf/31+fKXv5wLLrggt99++6QOe2syu+ZtSaaPUD9yFfu3JGesYtulSS4dob4gyb6r6wUAACBJHnzwwWy//fY56aSTsu222+bjH/94ttpqqzzxxBOZMmVKHn/88WyxxRbZZptt8vDDD+erX/1qDj/88BeM89xzz+X+++/PEUcckde85jW5/PLL87Of/SzbbrvtS/+hxsnkjacAAMDEsRaPPBiL22+/PWeddVY22GCDbLzxxrn44ovzne98J3PmzMlOO+2U+fPnZ/r06dlzzz2zyy67ZNasWSOO8+yzz+akk07KsmXL0lrLmWeeOakDXpLU0Im3yWfmzJltwYIFg25j/IzHc0Ve4v9jAQCw/lq0aFH22muvQbex3hjpz7uqbmmtzVx53zV6hAIAAACTg5AHAADQI0IeAABAjwh5AAAAPSLkAQAA9IiQBwAA0COekwcAAIzZfpftN67j3X7K7eM63vMuuuiinH766XnZy162yn3OO++8bLnllvmTP/mTddLDuuZMHgAAsN646KKL8tRTTw26jXVKyAMAACadD3zgA5k7d26S5I/+6I9y5JFHJkm+9a1v5cQTT8zb3/72zJw5M/vss0/OPffcJMncuXPz4IMP5ogjjsgRRxyRJPna176WGTNm5IADDshRRx21Yvy77rorhx9+eP7Df/gPK97nySefzLHHHpsDDjgg++67bz7/+c+/lB95jblcEwAAmHRmz56dD37wgznzzDOzYMGC/PznP88zzzyTG264IYcddlje9KY3Zfvtt8+zzz6bo446KrfddlvOPPPMfOhDH8r8+fMzZcqULF26NG9729ty/fXXZ7fddsujjz66Yvy777478+fPzxNPPJE99tgjb3/72/O1r30tO+20U6655pokybJlywb18V+UM3kAAMCkc9BBB+WWW27J448/nk033TSvfvWrs2DBgtxwww2ZPXt2rrjiisyYMSPTp0/PnXfembvuuusFY9x000057LDDsttuuyVJtt9++xXbjj322Gy66aaZMmVKXv7yl+fhhx/Ofvvtl3nz5uXss8/ODTfckG222eYl+7yjIeQBAACTzsYbb5zddtstn/rUp/Krv/qrmT17dubPn5/Fixdn8803z4UXXphvfvObue2223Lsscfm6aefHtX4m2666YrlDTfcMMuXL88rX/nK3Hrrrdlvv/3ynve8J+eff/54f6xxIeQBAACT0uzZs3PhhRfmsMMOy+zZs/PRj34006dPz+OPP54tttgi22yzTR5++OF89atfXXHMVlttlSeeeCJJcuihh+b666/PvffemyS/cLnmSB588MG87GUvy0knnZSzzjort95667r7cGPgnjwAAGDM1tUjD17M7Nmzc8EFF+TVr351tthii2y22WaZPXt2DjjggEyfPj177rlndtlll8yaNWvFMaeffnrmzJmTnXbaKfPnz88ll1yS448/Ps8991xe/vKXZ968eat8v9tvvz1nnXVWNthgg2y88ca5+OKLX4qPOWrVWht0D2tl5syZbcGCBYNuY/ycNw7X8543MW/8BACgfxYtWpS99tpr0G2sN0b6866qW1prM1fe1+WaAAAAPSLkAQAA9IiQBwAA0CNCHgAAQI8IeQAAAD0i5AEAAPSI5+QBAABjtmjP8X2cwl53LxrX8Ubr29/+di688MJ85StfGWgfa8OZPAAAgB4R8gAAgEnpQx/6UPbdd9/su+++ueiii3Lfffdlzz33zIknnpi99torb3zjG/PUU08lSW655Zb82q/9Wg466KC87nWvy0MPPZQkOfzww3P22WfnkEMOyStf+crccMMNL3ifv//7v8+BBx6YAw88MNOnT88TTzyRhx56KIcddlgOPPDA7LvvviuO23LLLVccd+WVV+bUU09NkixdujS/9Vu/lYMPPjgHH3xw/vf//t+rHHushDwAAGDSueWWW/LJT34y3/3ud3PTTTflYx/7WB577LH84Ac/yDve8Y4sWrQoW2+9dT7ykY/kmWeeyR/8wR/kyiuvzC233JLTTjst/+2//bcVYy1fvjzf+973ctFFF+W9733vC97rwgsvzIc//OEsXLgwN9xwQzbffPN89rOfzete97osXLgw3//+93PggQe+aL9/+Id/mD/6oz/KzTffnC9+8Yt561vfusqxx8o9eQAAwKRz44035jd/8zezxRZbJEmOP/743HDDDdlll10ya9asJMlJJ52UuXPnZs6cObnjjjvy2te+Nkny7LPP5hWveMWKsY4//vgkyUEHHZT77rvvBe81a9as/PEf/3FOPPHEHH/88Zk2bVoOPvjgnHbaaXnmmWdy3HHHrTbkfeMb38hdd921Yv3xxx/Pz372sxHHHitn8gAAgN6oqhest9ayzz77ZOHChVm4cGFuv/32fP3rX1+xz6abbpok2XDDDbN8+fIXjHnOOefk4x//eP7lX/4ls2bNyt13353DDjss119/fXbeeeeceuqp+fSnP/2C93/66adXLD/33HO56aabVvTwwAMPZMsttxxx7LES8gAAgEln9uzZ+bu/+7s89dRTefLJJ3PVVVdl9uzZ+cd//Md85zvfSZJ89rOfzWte85rsscceWbp06Yr6M888kzvvvHON3+tHP/pR9ttvv5x99tk5+OCDc/fdd+fHP/5xdtxxx7ztbW/LW9/61tx6661Jkh133DGLFi3Kc889l6uuumrFGEcffXT+6q/+asX6woULVzn2WLlcEwAAGLOX+pEHM2bMyKmnnppDDjkkSfLWt7412223XfbYY498+MMfzmmnnZa99947b3/727PJJpvkyiuvzJlnnplly5Zl+fLlede73pV99tlnjd7roosuyvz587PBBhtkn332yTHHHJPLL788H/jAB7Lxxhtnyy23XHEm7/3vf39e//rXZ+rUqZk5c2Z+9rOfJUnmzp2bM844I/vvv3+WL1+eww47LB/96EdHHHusqrU25kEGYebMmW3BggWDbmP8nLfNOIyxbOxjAADAGli0aFH22mt8n403Vvfdd19e//rX54477hh0K+NupD/vqrqltTZz5X1drgkAANAjQh4AANALu+66ay/P4o2WkAcAAKyVyXrr12Qz2j9nIQ8AABi1zTbbLI888oigt4611vLII49ks802W+NjzK4JAACM2rRp07JkyZIsXbp00K303mabbTaqh6QLeQAAwKhtvPHG2W233QbdBiNwuSYAAECPCHkAAAA9IuQBAAD0iJAHAADQI0IeAABAjwh5AAAAPSLkAQAA9IiQBwAA0CNCHgAAQI8IeQAAAD0i5AEAAPSIkAcAANAjqw15VbVZVX2vqr5fVXdW1Xu7+m5V9d2qWlxVn6+qTbr6pt364m77rsPGendX/0FVvW5YfU5XW1xV54z/xwQAAFg/rMmZvJ8nObK1dkCSA5PMqapDk/xFkr9srf1KkseSvKXb/y1JHuvqf9ntl6raO8kJSfZJMifJR6pqw6raMMmHkxyTZO8kv9vtCwAAwCitNuS1IT/rVjfuXi3JkUmu7OqXJTmuW35Dt55u+1FVVV398tbaz1tr9yZZnOSQ7rW4tXZPa+1fk1ze7QsAAMAordE9ed0Zt4VJfpJkXpIfJflpa215t8uSJDt3yzsnuT9Juu3LkuwwvL7SMauqj9TH6VW1oKoWLF26dE1aBwAAWK+sUchrrT3bWjswybQMnXnbc512teo+LmmtzWytzZw6deogWgAAAJjQRjW7Zmvtp0nmJ3l1km2raqNu07QkD3TLDyTZJUm67dskeWR4faVjVlUHAABglNZkds2pVbVtt7x5ktcmWZShsPfGbrdTknypW766W0+3/VuttdbVT+hm39wtye5Jvpfk5iS7d7N1bpKhyVmuHo8PBwAAsL7ZaPW75BVJLutmwdwgyRWtta9U1V1JLq+qP0vyD0k+0e3/iST/q6oWJ3k0Q6EtrbU7q+qKJHclWZ7kjNbas0lSVe9Mcl2SDZNc2lq7c9w+IQAAwHpktSGvtXZbkukj1O/J0P15K9efTvKmVYx1QZILRqhfm+TaNegXAACAFzGqe/IAAACY2IQ8AACAHhHyAAAAekTIAwAA6BEhDwAAoEeEPAAAgB4R8gAAAHpEyAMAAOgRIQ8AAKBHhDwAAIAeEfIAAAB6RMgDAADoESEPAACgR4Q8AACAHhHyAAAAekTIAwAA6BEhDwAAoEeEPAAAgB4R8gAAAHpEyAMAAOgRIQ8AAKBHhDwAAIAeEfIAAAB6RMgDAADoESEPAACgR4Q8AACAHhHyAAAAekTIAwAA6BEhDwAAoEeEPAAAgB4R8gAAAHpEyAMAAOgRIQ8AAKBHhDwAAIAeEfIAAAB6RMgDAADoESEPAACgR4Q8AACAHhHyAAAAekTIAwAA6BEhDwAAoEeEPAAAgB4R8gAAAHpEyAMAAOgRIQ8AAKBHhDwAAIAeEfIAAAB6RMgDAADoESEPAACgR1Yb8qpql6qaX1V3VdWdVfWHXf28qnqgqhZ2r18fdsy7q2pxVf2gql43rD6nqy2uqnOG1Xerqu929c9X1Sbj/UEBAADWB2tyJm95kv/cWts7yaFJzqiqvbttf9laO7B7XZsk3bYTkuyTZE6Sj1TVhlW1YZIPJzkmyd5JfnfYOH/RjfUrSR5L8pZx+nwAAADrldWGvNbaQ621W7vlJ5IsSrLzixzyhiSXt9Z+3lq7N8niJId0r8WttXtaa/+a5PIkb6iqSnJkkiu74y9LctzafiAAAID12ajuyauqXZNMT/LdrvTOqrqtqi6tqu262s5J7h922JKutqr6Dkl+2lpbvlJ9pPc/vaoWVNWCpUuXjqZ1AACA9cIah7yq2jLJF5O8q7X2eJKLk/xykgOTPJTkg+ukw2Faa5e01ma21mZOnTp1Xb8dAADApLPRmuxUVRtnKOB9prX2t0nSWnt42PaPJflKt/pAkl2GHT6tq2UV9UeSbFtVG3Vn84bvDwAAwCisyeyaleQTSRa11j40rP6KYbv9ZpI7uuWrk5xQVZtW1W5Jdk/yvSQ3J9m9m0lzkwxNznJ1a60lmZ/kjd3xpyT50tg+FgAAwPppTc7kzUry5iS3V9XCrvZfMzQ75oFJWpL7kvynJGmt3VlVVyS5K0Mzc57RWns2SarqnUmuS7Jhkktba3d2452d5PKq+rMk/5ChUAkAAMAorTbktdZuTFIjbLr2RY65IMkFI9SvHem41to9GZp9EwAAgDEY1eyaAAAATGxCHgAAQI8IeQAAAD0i5AEAAPSIkAcAANAjQh4AAECPCHkAAAA9IuQBAAD0iJAHAADQI0IeAABAjwh5AAAAPSLkAQAA9IiQBwAA0CNCHgAAQI8IeQAAAD2y0aAbANbCeduMwxjLxj4GAAATjjN5AAAAPSLkAQAA9IiQBwAA0CNCHgAAQI8IeQAAAD0i5AEAAPSIkAcAANAjQh4AAECPCHkAAAA9IuQBAAD0iJAHAADQI0IeAABAjwh5AAAAPSLkAQAA9IiQBwAA0CNCHgAAQI8IeQAAAD0i5AEAAPSIkAcAANAjQh4AAECPCHkAAAA9IuQBAAD0iJAHAADQI0IeAABAjwh5AAAAPSLkAQAA9IiQBwAA0CNCHgAAQI8IeQAAAD0i5AEAAPSIkAcAANAjQh4AAECPCHkAAAA9stqQV1W7VNX8qrqrqu6sqj/s6ttX1byq+mH3c7uuXlU1t6oWV9VtVTVj2FindPv/sKpOGVY/qKpu746ZW1W1Lj4sAABA363JmbzlSf5za23vJIcmOaOq9k5yTpJvttZ2T/LNbj1Jjkmye/c6PcnFyVAoTHJuklclOSTJuc8Hw26ftw07bs7YPxoAAMD6Z7Uhr7X2UGvt1m75iSSLkuyc5A1JLut2uyzJcd3yG5J8ug25Kcm2VfWKJK9LMq+19mhr7bEk85LM6bZt3Vq7qbXWknx62FgAAACMwqjuyauqXZNMT/LdJDu21h7qNv1Tkh275Z2T3D/ssCVd7cXqS0aoAwAAMEprHPKqasskX0zyrtba48O3dWfg2jj3NlIPp1fVgqpasHTp0nX9dgAAAJPOGoW8qto4QwHvM621v+3KD3eXWqb7+ZOu/kCSXYYdPq2rvVh92gj1F2itXdJam9lamzl16tQ1aR0AAGC9siaza1aSTyRZ1Fr70LBNVyd5fobMU5J8aVj95G6WzUOTLOsu67wuydFVtV034crRSa7rtj1eVYd273XysLEAAAAYhY3WYJ9ZSd6c5PaqWtjV/muS9ye5oqrekuTHSX6723Ztkl9PsjjJU0l+L0laa49W1Z8mubnb7/zW2qPd8juSfCrJ5km+2r0AAAAYpdWGvNbajUlW9dy6o0bYvyU5YxVjXZrk0hHqC5Lsu7peAAAAeHGjml0TAACAiU3IAwAA6BEhDwAAoEeEPAAAgB4R8gAAAHpEyAMAAOgRIQ8AAKBHhDwAAIAeEfIAAAB6RMgDAADoESEPAACgR4Q8AACAHhHyAAAAekTIAwAA6BEhDwAAoEeEPAAAgB4R8gAAAHpEyAMAAOgRIQ8AAKBHhDwAAIAeEfIAAAB6RMgDAADoESEPAACgR4Q8AACAHtlo0A0AsA6dt804jLFs7GMAAC8ZZ/IAAAB6RMgDAADoESEPAACgR4Q8AACAHhHyAAAAekTIAwAA6BEhDwAAoEeEPAAAgB4R8gAAAHpEyAMAAOgRIQ8AAKBHNhp0AwAATDLnbTMOYywb+xjAiJzJAwAA6BEhDwAAoEeEPAAAgB4R8gAAAHpEyAMAAOgRIQ8AAKBHhDwAAIAeEfIAAAB6RMgDAADoESEPAACgR4Q8AACAHhHyAAAAekTIAwAA6JHVhryqurSqflJVdwyrnVdVD1TVwu7168O2vbuqFlfVD6rqdcPqc7ra4qo6Z1h9t6r6blf/fFVtMp4fEAAAYH2yJmfyPpVkzgj1v2ytHdi9rk2Sqto7yQlJ9umO+UhVbVhVGyb5cJJjkuyd5He7fZPkL7qxfiXJY0neMpYPBAAAsD5bbchrrV2f5NE1HO8NSS5vrf28tXZvksVJDulei1tr97TW/jXJ5UneUFWV5MgkV3bHX5bkuFF+BgAAADpjuSfvnVV1W3c553Zdbeck9w/bZ0lXW1V9hyQ/ba0tX6k+oqo6vaoWVNWCpUuXjqF1AACAflrbkHdxkl9OcmCSh5J8cNw6ehGttUtaazNbazOnTp36UrwlAADApLLR2hzUWnv4+eWq+liSr3SrDyTZZdiu07paVlF/JMm2VbVRdzZv+P4AAACM0lqdyauqVwxb/c0kz8+8eXWSE6pq06raLcnuSb6X5OYku3czaW6SoclZrm6ttSTzk7yxO/6UJF9am54AAABYgzN5VfW5JIcnmVJVS5Kcm+TwqjowSUtyX5L/lCSttTur6ookdyVZnuSM1tqz3TjvTHJdkg2TXNpau7N7i7OTXF5Vf5bkH5J8Ytw+HQAAwHpmtSGvtfa7I5RXGcRaaxckuWCE+rVJrh2hfk+GZt8EAABgjMYyuyYAAAATjJAHAADQI0IeAABAjwh5AAAAPSLkAQAA9IiQBwAA0CNCHgAAQI8IeQAAAD0i5AEAAPSIkAcAANAjQh4AAECPCHkAAAA9IuQBAAD0iJAHAADQI0IeAABAjwh5AAAAPSLkAQAA9IiQBwAA0CNCHgAAQI8IeQAAAD0i5AEAAPSIkAcAANAjQh4AAECPCHkAAAA9IuQBAAD0iJAHAADQI0IeAABAjwh5AAAAPSLkAQAA9IiQBwAA0CNCHgAAQI8IeQAAAD0i5AEAAPSIkAcAANAjGw26AQBggjhvm3EYY9nYxwBgTJzJAwAA6BEhDwAAoEeEPAAAgB4R8gAAAHpEyAMAAOgRIQ8AAKBHhDwAAIAeEfIAAAB6RMgDAADoESEPAACgR4Q8AACAHhHyAAAAekTIAwAA6BEhDwAAoEdWG/Kq6tKq+klV3TGstn1VzauqH3Y/t+vqVVVzq2pxVd1WVTOGHXNKt/8Pq+qUYfWDqur27pi5VVXj/SEBAADWF2tyJu9TSeasVDsnyTdba7sn+Wa3niTHJNm9e52e5OJkKBQmOTfJq5IckuTc54Nht8/bhh238nsBAACwhlYb8lpr1yd5dKXyG5Jc1i1fluS4YfVPtyE3Jdm2ql6R5HVJ5rXWHm2tPZZkXpI53batW2s3tdZakk8PGwsAAIBRWtt78nZsrT3ULf9Tkh275Z2T3D9svyVd7cXqS0aoj6iqTq+qBVW1YOnSpWvZOgAAQH+NeeKV7gxcG4de1uS9LmmtzWytzZw6depL8ZYAAACTytqGvIe7Sy3T/fxJV38gyS7D9pvW1V6sPm2EOgAAAGthbUPe1UmenyHzlCRfGlY/uZtl89Aky7rLOq9LcnRVbddNuHJ0kuu6bY9X1aHdrJonDxsLAACAUdpodaMiEJ0AABAHSURBVDtU1eeSHJ5kSlUtydAsme9PckVVvSXJj5P8drf7tUl+PcniJE8l+b0kaa09WlV/muTmbr/zW2vPT+byjgzN4Ll5kq92LwAAANbCakNea+13V7HpqBH2bUnOWMU4lya5dIT6giT7rq4PAAAAVm/ME68AAAAwcQh5AAAAPSLkAQAA9IiQBwAA0CNCHgAAQI8IeQAAAD0i5AEAAPSIkAcAANAjQh4AAECPCHkAAAA9IuQBAAD0iJAHAADQI0IeAABAjwh5AAAAPSLkAQAA9IiQBwAA0CNCHgAAQI8IeQAAAD0i5AEAAPSIkAcAANAjQh4AAECPCHkAAAA9IuQBAAD0iJAHAADQI0IeAABAjwh5AAAAPSLkAQAA9IiQBwAA0CNCHgAAQI8IeQAAAD0i5AEAAPSIkAcAANAjQh4AAECPCHkAAAA9IuQBAAD0iJAHAADQI0IeAABAjwh5AAAAPSLkAQAA9IiQBwAA0CNCHgAAQI8IeQAAAD0i5AEAAPSIkAcAANAjQh4AAECPCHkAAAA9IuQBAAD0iJAHAADQI0IeAABAj2w06AYAAIAeO2+bcRhj2djHWI+M6UxeVd1XVbdX1cKqWtDVtq+qeVX1w+7ndl29qmpuVS2uqtuqasawcU7p9v9hVZ0yto8EAACw/hqPyzWPaK0d2Fqb2a2fk+SbrbXdk3yzW0+SY5Ls3r1OT3JxMhQKk5yb5FVJDkly7vPBEAAAgNFZF/fkvSHJZd3yZUmOG1b/dBtyU5Jtq+oVSV6XZF5r7dHW2mNJ5iWZsw76AgAA6L2xhryW5OtVdUtVnd7VdmytPdQt/1OSHbvlnZPcP+zYJV1tVfUXqKrTq2pBVS1YunTpGFsHAADon7FOvPKa1toDVfXyJPOq6u7hG1trraraGN9j+HiXJLkkSWbOnDlu4wIAAPTFmM7ktdYe6H7+JMlVGbqn7uHuMsx0P3/S7f5Akl2GHT6tq62qDgAAwCitdcirqi2qaqvnl5McneSOJFcneX6GzFOSfKlbvjrJyd0sm4cmWdZd1nldkqOrartuwpWjuxoAAACjNJbLNXdMclVVPT/OZ1trX6uqm5NcUVVvSfLjJL/d7X9tkl9PsjjJU0l+L0laa49W1Z8mubnb7/zW2qNj6AsmrF3PuWZcxrlvs3EZBgCAHlrrkNdauyfJASPUH0ly1Aj1luSMVYx1aZJL17YXAAAAhqyLRygAAAAwIEIeAABAjwh5AAAAPSLkAQAA9IiQBwAA0CNjeYQCnfGYFt+U+AAAwHhwJg8AAKBHhDwAAIAeEfIAAAB6RMgDAADoESEPAACgR4Q8AACAHhHyAAAAekTIAwAA6BEhDwAAoEeEPAAAgB4R8gAAAHpEyAMAAOgRIQ8AAKBHhDwAAIAeEfIAAAB6RMgDAADoESEPAACgR4Q8AACAHhHyAAAAekTIAwAA6BEhDwAAoEeEPAAAgB4R8gAAAHpEyAMAAOiRjQbdAAAj2/Wca8Y8xn2bjUMjAMCk4kweAABAjwh5AAAAPSLkAQAA9IiQBwAA0CMmXgEAWI+Y1An6T8gDgEluPH5pT/ziDtAXQh6sp/a7bL8xj3H7KbePQycAAIwnIa9HxuOX9sQv7gAAMJmZeAUAAKBHhDwAAIAeEfIAAAB6RMgDAADoEROvAGtt0Z57jcs4e929aFzGAQBAyANgNczcCwCTi8s1AQAAesSZPABg3IzHmV9nfdcPrhKAdUfIAwAAXmDXc64Zl3Hu22xchmEUXK4JAADQIxPmTF5VzUnyP5JsmOTjrbX3D7il9dZ4zJhotkQAABiMCRHyqmrDJB9O8tokS5LcXFVXt9buGmxnAADAoLnfd3QmRMhLckiSxa21e5Kkqi5P8oYkQh4ArGc8gxNYF9an/7ZUa23QPaSq3phkTmvtrd36m5O8qrX2zpX2Oz3J6d3qHkl+8JI2OvFNSfLPg26CScP3hTXlu8Jo+L6wpnxXGA3fl5H9Umtt6srFiXImb4201i5Jcsmg+5ioqmpBa23moPtgcvB9YU35rjAavi+sKd8VRsP3ZXQmyuyaDyTZZdj6tK4GAADAKEyUkHdzkt2rareq2iTJCUmuHnBPAAAAk86EuFyztba8qt6Z5LoMPULh0tbanQNuazJyKSuj4fvCmvJdYTR8X1hTviuMhu/LKEyIiVcAAAAYHxPlck0AAADGgZAHAADQI0IeAABAjwh5AAAAPTIhZtdkbKpqwyQ7Ztj/nq21fxxcR8BkV1WzkixsrT1ZVSclmZHkf7TWfjzg1oBJrqp+I8lh3erft9a+PMh+oI+cyZvkquoPkjycZF6Sa7rXVwbaFBNWVU2rqquqamlV/aSqvlhV0wbdFxPSxUmeqqoDkvznJD9K8unBtsREU1VPVNXj3euJYetPVNXjg+6Piaeq/jzJHya5q3udWVXvG2xXTFR+b1l7HqEwyVXV4iSvaq09MuhemPiqal6Szyb5X13ppCQnttZeO7iumIiq6tbW2oyq+u9JHmitfeL52qB7AyavqrotyYGttee69Q2T/ENrbf/BdsZE5PeWtedM3uR3f5Jlg26CSWNqa+2TrbXl3etTSaYOuikmpCeq6t0Z+gv1mqraIMnGA+6JCayqXlNVv9ctT6mq3QbdExPWtsOWtxlYF0wGfm9ZS+7Jm/zuSfLtqromyc+fL7bWPjS4lpjAHunur/pct/67SZwFZiS/k+Q/JnlLa+2fqurfJ/nAgHtigqqqc5PMTLJHkk8m2STJ3ySZNci+mJD+PMk/VNX8JJWhe/POGWxLTGB+b1lLLtec5Lq/WF+gtfbel7oXJr6q+qUkf5Xk1Ulakv8vyZkm6gHGoqoWJpme5NbW2vSudptL8BhJVb0iycHd6vdaa/80yH6YuPzesvaEPABWqKobW2uvqaonMvQX6opNSVprbesBtcYEVlXfa60dMuxezi2SfEfIYyRVtXOSX8ovzgp+/eA6gv5xueYkVVUXtdbeVVVfzi/+IpYkaa39xgDaYoKrqrkjlJclWdBa+9JL3Q8TT2vtNd3PrQbdC5PKFVX1P5NsW1VvS3Jako8NuCcmoKr6iwxdDn5nkue6cksi5LFCVf1VRvj99nmttTNfwnYmJSFv8np+lqELh9We/z9DvcS9MHlslmTPJF/o1n8ryb1JDqiqI1pr7xpYZ0wY3Wx3d7bW9hx0L0wOrbULq+q1SR7P0H15/721Nm/AbTExHZdkj9baz1e7J+uzBYNuYLIT8iavaVV1aGvtw8nQpTIZmm2oJTl7oJ0xke2fZFZr7dkkqaqLk9yQ5DVJbh9kY0wcrbVnq+oHVfXv3ffAmqiqP07yecGONXBPhmbqFfJYpdbaZYPuYbIT8iav/5LkhGHrm2RoZrMtMjSz2RdGOoj13nZJtsy/PXZjiyTbd7/U+wuX4bZLcmf3D0hPPl90KTirsFWSr1fVo0k+n+QLrbWHB9wTE8iwy++eSrKwqr6ZX5wV3OV3vEBVTc3QyYu9M3Q1UpKktXbkwJqaJIS8yWuT1tr9w9Zv7B6I/kh3wzuM5P/N0F+u386/TV39vu47841BNsaE8/8MugEmj25G5/dW1f4Zut/q76tqSWvt/xpwa0wcz19+d0uSqwfZCJPKZzL0D0fHJvn9JKckWTrQjiYJs2tOUlW1uLX2K6vY9qPW2i+/1D0xOXRTVx/Srd7cWntwkP0A/VFV/y7JmzJ0pclWZtdkZd0/Kj497LaBDZNs2lp7arCdMRFV1S2ttYOGP5Klqm5urR28umPXdxsMugHW2ne7Gcx+QVX9pyTfG0A/TAJVVUmOSnJAN5vmRlV1yGoOYz1SVU9U1eMjvJ6oqscH3R8TU1W9o7tC4JtJdkjyNgGPVfhmks2HrW8eV5Kwas90Px+qqmOranqS7QfZ0GThTN4kVVUvT/J3Gbqe/daufFCSTZMc514IRtJNtPJckiNba3tV1XZJvu5fxICxqKo/z9DEKwsH3QsTW1UtbK0duLoaJElVvT5DE8TtkqGHom+d5L2tNZf8roYzeZNUa+0nrbVfTfKnSe7rXue31l4t4PEiXtVaOyPJ00nSWnssQ5P2AKy11tq7k2xZVb+XDE2WUFW7DbgtJqYnq2rG8ytVdVCSfxlgP0xA3fMUk2Tz1tqy1todrbUjWmsHCXhrxpk8WI9U1XeT/GqG7sWb0c1a9fXW2vQBtwZMYlV1boZmeN6jtfbKqtopQzNszhpwa0wwVXVwksuTPJihCcD+XZITWmuei8YKVXV7hh77dEtrbcbq9ueFzK4J65e5Sa5KsmNVXZDkjUneM9iWgB74zSTT090+0Fp7sKq2GmxLTESttZuras8ke3SlH7TWnnmxY1gvfS3JYxm6QmD4/eCVpLXWth5MW5OHyzVhPdJa+0yGnrH4viQPZej+Tc9UBMbqX9vQpUEtWTGDIqxQVf9l2Opx3eV3d7TWnqmq9w2sMSaq97TWtk1yTWtt62GvrQS8NSPkwfpnSpKnWmt/neSf3TcDjIMrqup/Jtm2m/n5G0k+PuCemFhOGLb87pW2zXkpG2FS+E7306zOa8nlmrAeGX7fTJJPJtk4yd8kcd8MsNZaaxdW1Wsz9AvZHkn+e2tt3oDbYmKpVSyPtA6bVNV/TPKrVXX8yhtba387gJ4mFSEP1i/umwHWiS7UzUuSqtqgqk7sLhGHpLuUd4Tlkdbh95OcmGTbJP/3SttaEiFvNYQ8WL/8a2utVZX7ZoAxq6qtk5yRZOckV2co5J2R5E+SfD+JkMfzDugm0Kgkmw+bTKOSbDa4tpiIWms3Jrmxqha01j4x6H4mIyEP1i8r3zdzWpKPDbgnYPL6XxmaAe87Sd6a5L9m6Jf24zwYneFaaxsOugcmpWeq6uSVi621Tw+imcnEc/JgPdPdN3N0hn4Ru859M8DaqqrbW2v7dcsbZmjW3n/fWnt6sJ0BfVBVfzVsdbMkRyW5tbX2xgG1NGkIebCeqqopSR5p/iMArKWqunX4g4pXXgcYT1W1bZLLW2tmZF0Nj1CA9UBVHVpV366qv62q6VV1R5I7kjxcVf5DCaytA6rq8e71RJL9n19e6QHGAOPhySQe/bQG3JMH64e/ztC9Mtsk+VaSY1prN1XVnkk+l+Rrg2wOmJzcZwWsS1X15fzb7KsbJNk7yRWD62jycLkmrAeqamFr7cBueVFrba9h2/6htTZ9cN0BALxQVf3asNXlSX7cWlsyqH4mE2fyYP3w3LDlf1lpm3/pAQAmnNba3z+//PxcAgNsZ1JxJg/WA1X1bIauY68kmyd56vlNSTZrrW08qN4AAIarqkOTvD/Jo0n+NEOPa5mSoUs2T26tuc1kNYQ8AABgwqiqBfm3uQQuyUpzCbjNZPXMrgkAAEwkG7XWvt5a+0KSf2qt3ZQkrbW7B9zXpCHkAQAAE4m5BMbI5ZoAAMCEYS6BsRPyAAAAesTlmgAAAD0i5AEAAPSIkAcAANAjQh4AAECP/P/HtLLr5hUEvAAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "df.plot(kind='bar', figsize=(15, 8))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docs/jupyter/Pandas完全指南.ipynb b/docs/jupyter/Pandas完全指南.ipynb new file mode 100644 index 0000000..12eb55b --- /dev/null +++ b/docs/jupyter/Pandas完全指南.ipynb @@ -0,0 +1,8346 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 前言\n", + "\n", + "\n", + "Pandas 是一个Python语言实现的,开源,易于使用的数据架构以及数据分析工具。在Pandas中主要有两种数据类型,可以简单的理解为:\n", + "\n", + "- Series:一维数组(列表)\n", + "- DateFrame:二维数组(矩阵)\n", + "\n", + "在线实验:[Pandas完全指南.ipynb](https://nbviewer.jupyter.org/github/cyub/code-examples/blob/master/jupyter-notes/Pandas%E5%AE%8C%E5%85%A8%E6%8C%87%E5%8D%97.ipynb#)\n", + "\n", + "学习资料:\n", + "\n", + "- [Pandas中文文档](https://www.pypandas.cn/docs/)\n", + "- [利用Python进行数据分析·第2版](https://github.com/iamseancheney/python_for_data_analysis_2nd_chinese_version)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 导入pandas" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# 安装pandas,matplotlib(绘图用) 包\n", + "!pip install pandas matplotlib" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# 导入包\n", + "import pandas as pd\n", + "import numpy as np\n", + "from IPython.display import Image" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 创建列表" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 创建普通列表" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "s = pd.Series([1, 3, 6, np.nan, 23, 3]) # type(s) === 'pandas.core.series.Series'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 创建时间列表" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "dates = pd.date_range('20200101', periods=6)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 创建矩阵" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 根据列表(Series)创建矩阵" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=['a', 'b', 'c', 'd'])" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abcd
2020-01-012.078888-0.959554-0.3672651.108948
2020-01-02-0.4123620.232540-1.903134-1.831848
2020-01-03-1.8987210.9199760.4856300.758721
2020-01-040.4869610.3783230.1867270.671816
2020-01-050.702523-0.5567980.635000-0.118564
2020-01-060.654506-0.0007270.417828-0.611751
\n", + "
" + ], + "text/plain": [ + " a b c d\n", + "2020-01-01 2.078888 -0.959554 -0.367265 1.108948\n", + "2020-01-02 -0.412362 0.232540 -1.903134 -1.831848\n", + "2020-01-03 -1.898721 0.919976 0.485630 0.758721\n", + "2020-01-04 0.486961 0.378323 0.186727 0.671816\n", + "2020-01-05 0.702523 -0.556798 0.635000 -0.118564\n", + "2020-01-06 0.654506 -0.000727 0.417828 -0.611751" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "df2 = pd.DataFrame({\n", + " 'a':pd.Series([1, 2, 3, 4]),\n", + " 'b':pd.Timestamp('20180708'),\n", + " 'c':pd.Categorical(['cate1', 'cate2', 'cate3', 'cate4'])\n", + "})" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abc
012018-07-08cate1
122018-07-08cate2
232018-07-08cate3
342018-07-08cate4
\n", + "
" + ], + "text/plain": [ + " a b c\n", + "0 1 2018-07-08 cate1\n", + "1 2 2018-07-08 cate2\n", + "2 3 2018-07-08 cate3\n", + "3 4 2018-07-08 cate4" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 根据字典创建矩阵" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "data = {'name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy', 'Jack', 'Tim'], \n", + " 'age': [20, 32, 36, 24, 23, 18, 27], \n", + " 'gender': np.random.choice(['M','F'],size=7),\n", + " 'score': [25, 94, 57, 62, 70, 88, 67],\n", + " 'country': np.random.choice(['US','CN'],size=7),\n", + " }\n", + "df3 = pd.DataFrame(data, columns = ['name', 'age', 'gender', 'score', 'country'])" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nameagegenderscorecountry
0Jason20F25US
1Molly32F94CN
2Tina36F57CN
3Jake24F62US
4Amy23F70CN
5Jack18F88US
6Tim27F67CN
\n", + "
" + ], + "text/plain": [ + " name age gender score country\n", + "0 Jason 20 F 25 US\n", + "1 Molly 32 F 94 CN\n", + "2 Tina 36 F 57 CN\n", + "3 Jake 24 F 62 US\n", + "4 Amy 23 F 70 CN\n", + "5 Jack 18 F 88 US\n", + "6 Tim 27 F 67 CN" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df3" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 矩阵属性、检视数据" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 行数列数" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(6, 4)" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 索引" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2020-01-01', '2020-01-02', '2020-01-03', '2020-01-04',\n", + " '2020-01-05', '2020-01-06'],\n", + " dtype='datetime64[ns]', freq='D')" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.index" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 列名" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['a', 'b', 'c', 'd'], dtype='object')" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.columns" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 值" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 2.07888761e+00, -9.59553787e-01, -3.67264810e-01,\n", + " 1.10894771e+00],\n", + " [-4.12361501e-01, 2.32539690e-01, -1.90313388e+00,\n", + " -1.83184759e+00],\n", + " [-1.89872061e+00, 9.19975617e-01, 4.85630402e-01,\n", + " 7.58720982e-01],\n", + " [ 4.86960560e-01, 3.78322949e-01, 1.86726767e-01,\n", + " 6.71815555e-01],\n", + " [ 7.02523492e-01, -5.56797752e-01, 6.35000384e-01,\n", + " -1.18564302e-01],\n", + " [ 6.54506255e-01, -7.26685067e-04, 4.17828341e-01,\n", + " -6.11751157e-01]])" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.values" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 矩阵信息" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "DatetimeIndex: 6 entries, 2020-01-01 to 2020-01-06\n", + "Freq: D\n", + "Data columns (total 4 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 a 6 non-null float64\n", + " 1 b 6 non-null float64\n", + " 2 c 6 non-null float64\n", + " 3 d 6 non-null float64\n", + "dtypes: float64(4)\n", + "memory usage: 240.0 bytes\n" + ] + } + ], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 矩阵描述信息" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abcd
count6.0000006.0000006.0000006.000000
mean0.2686330.002293-0.090869-0.003780
std1.3283840.6744320.9545441.095503
min-1.898721-0.959554-1.903134-1.831848
25%-0.187531-0.417780-0.228767-0.488454
50%0.5707330.1159070.3022780.276626
75%0.6905190.3418770.4686800.736995
max2.0788880.9199760.6350001.108948
\n", + "
" + ], + "text/plain": [ + " a b c d\n", + "count 6.000000 6.000000 6.000000 6.000000\n", + "mean 0.268633 0.002293 -0.090869 -0.003780\n", + "std 1.328384 0.674432 0.954544 1.095503\n", + "min -1.898721 -0.959554 -1.903134 -1.831848\n", + "25% -0.187531 -0.417780 -0.228767 -0.488454\n", + "50% 0.570733 0.115907 0.302278 0.276626\n", + "75% 0.690519 0.341877 0.468680 0.736995\n", + "max 2.078888 0.919976 0.635000 1.108948" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abcd
2020-06-012.078888-0.959554-0.3672651.108948
2020-06-02-0.4123620.232540-1.903134-1.831848
2020-06-03-1.8987210.9199760.4856300.758721
2020-06-040.4869610.3783230.1867270.671816
2020-06-050.702523-0.5567980.635000-0.118564
2020-06-060.654506-0.0007270.417828-0.611751
\n", + "
" + ], + "text/plain": [ + " a b c d\n", + "2020-06-01 2.078888 -0.959554 -0.367265 1.108948\n", + "2020-06-02 -0.412362 0.232540 -1.903134 -1.831848\n", + "2020-06-03 -1.898721 0.919976 0.485630 0.758721\n", + "2020-06-04 0.486961 0.378323 0.186727 0.671816\n", + "2020-06-05 0.702523 -0.556798 0.635000 -0.118564\n", + "2020-06-06 0.654506 -0.000727 0.417828 -0.611751" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "### 更改索引\n", + "df.index = pd.date_range('2020/06/01', periods=df.shape[0])\n", + "\n", + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### top5 数据" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abcd
2020-06-012.078888-0.959554-0.3672651.108948
\n", + "
" + ], + "text/plain": [ + " a b c d\n", + "2020-06-01 2.078888 -0.959554 -0.367265 1.108948" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head(1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### tail5 数据" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abcd
2020-06-02-0.4123620.232540-1.903134-1.831848
2020-06-03-1.8987210.9199760.4856300.758721
2020-06-040.4869610.3783230.1867270.671816
2020-06-050.702523-0.5567980.635000-0.118564
2020-06-060.654506-0.0007270.417828-0.611751
\n", + "
" + ], + "text/plain": [ + " a b c d\n", + "2020-06-02 -0.412362 0.232540 -1.903134 -1.831848\n", + "2020-06-03 -1.898721 0.919976 0.485630 0.758721\n", + "2020-06-04 0.486961 0.378323 0.186727 0.671816\n", + "2020-06-05 0.702523 -0.556798 0.635000 -0.118564\n", + "2020-06-06 0.654506 -0.000727 0.417828 -0.611751" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.tail(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 某一列值统计" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "-1.898721 1\n", + " 0.702523 1\n", + " 2.078888 1\n", + " 0.486961 1\n", + " 0.654506 1\n", + "-0.412362 1\n", + "Name: a, dtype: int64" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['a'].value_counts(dropna=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 查看每一列唯一值统计" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abcd
-1.903134NaNNaN1.0NaN
-1.8987211.0NaNNaNNaN
-1.831848NaNNaNNaN1.0
-0.959554NaN1.0NaNNaN
-0.611751NaNNaNNaN1.0
-0.556798NaN1.0NaNNaN
-0.4123621.0NaNNaNNaN
-0.367265NaNNaN1.0NaN
-0.118564NaNNaNNaN1.0
-0.000727NaN1.0NaNNaN
0.186727NaNNaN1.0NaN
0.232540NaN1.0NaNNaN
0.378323NaN1.0NaNNaN
0.417828NaNNaN1.0NaN
0.485630NaNNaN1.0NaN
0.4869611.0NaNNaNNaN
0.635000NaNNaN1.0NaN
0.6545061.0NaNNaNNaN
0.671816NaNNaNNaN1.0
0.7025231.0NaNNaNNaN
0.758721NaNNaNNaN1.0
0.919976NaN1.0NaNNaN
1.108948NaNNaNNaN1.0
2.0788881.0NaNNaNNaN
\n", + "
" + ], + "text/plain": [ + " a b c d\n", + "-1.903134 NaN NaN 1.0 NaN\n", + "-1.898721 1.0 NaN NaN NaN\n", + "-1.831848 NaN NaN NaN 1.0\n", + "-0.959554 NaN 1.0 NaN NaN\n", + "-0.611751 NaN NaN NaN 1.0\n", + "-0.556798 NaN 1.0 NaN NaN\n", + "-0.412362 1.0 NaN NaN NaN\n", + "-0.367265 NaN NaN 1.0 NaN\n", + "-0.118564 NaN NaN NaN 1.0\n", + "-0.000727 NaN 1.0 NaN NaN\n", + " 0.186727 NaN NaN 1.0 NaN\n", + " 0.232540 NaN 1.0 NaN NaN\n", + " 0.378323 NaN 1.0 NaN NaN\n", + " 0.417828 NaN NaN 1.0 NaN\n", + " 0.485630 NaN NaN 1.0 NaN\n", + " 0.486961 1.0 NaN NaN NaN\n", + " 0.635000 NaN NaN 1.0 NaN\n", + " 0.654506 1.0 NaN NaN NaN\n", + " 0.671816 NaN NaN NaN 1.0\n", + " 0.702523 1.0 NaN NaN NaN\n", + " 0.758721 NaN NaN NaN 1.0\n", + " 0.919976 NaN 1.0 NaN NaN\n", + " 1.108948 NaN NaN NaN 1.0\n", + " 2.078888 1.0 NaN NaN NaN" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.apply(pd.Series.value_counts)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 排序" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 根据索引(index)排序" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abcd
2020-06-060.654506-0.0007270.417828-0.611751
2020-06-050.702523-0.5567980.635000-0.118564
2020-06-040.4869610.3783230.1867270.671816
2020-06-03-1.8987210.9199760.4856300.758721
2020-06-02-0.4123620.232540-1.903134-1.831848
2020-06-012.078888-0.959554-0.3672651.108948
\n", + "
" + ], + "text/plain": [ + " a b c d\n", + "2020-06-06 0.654506 -0.000727 0.417828 -0.611751\n", + "2020-06-05 0.702523 -0.556798 0.635000 -0.118564\n", + "2020-06-04 0.486961 0.378323 0.186727 0.671816\n", + "2020-06-03 -1.898721 0.919976 0.485630 0.758721\n", + "2020-06-02 -0.412362 0.232540 -1.903134 -1.831848\n", + "2020-06-01 2.078888 -0.959554 -0.367265 1.108948" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# sort_index(axis=, ascending=)\n", + "# axis:0-行排序,1-列排序; ascending:True-升序,False-降序\n", + "df.sort_index(axis=0, ascending=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
dcba
2020-06-011.108948-0.367265-0.9595542.078888
2020-06-02-1.831848-1.9031340.232540-0.412362
2020-06-030.7587210.4856300.919976-1.898721
2020-06-040.6718160.1867270.3783230.486961
2020-06-05-0.1185640.635000-0.5567980.702523
2020-06-06-0.6117510.417828-0.0007270.654506
\n", + "
" + ], + "text/plain": [ + " d c b a\n", + "2020-06-01 1.108948 -0.367265 -0.959554 2.078888\n", + "2020-06-02 -1.831848 -1.903134 0.232540 -0.412362\n", + "2020-06-03 0.758721 0.485630 0.919976 -1.898721\n", + "2020-06-04 0.671816 0.186727 0.378323 0.486961\n", + "2020-06-05 -0.118564 0.635000 -0.556798 0.702523\n", + "2020-06-06 -0.611751 0.417828 -0.000727 0.654506" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.sort_index(axis=1, ascending=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 根据值排序" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abcd
2020-06-012.078888-0.959554-0.3672651.108948
2020-06-050.702523-0.5567980.635000-0.118564
2020-06-060.654506-0.0007270.417828-0.611751
2020-06-040.4869610.3783230.1867270.671816
2020-06-02-0.4123620.232540-1.903134-1.831848
2020-06-03-1.8987210.9199760.4856300.758721
\n", + "
" + ], + "text/plain": [ + " a b c d\n", + "2020-06-01 2.078888 -0.959554 -0.367265 1.108948\n", + "2020-06-05 0.702523 -0.556798 0.635000 -0.118564\n", + "2020-06-06 0.654506 -0.000727 0.417828 -0.611751\n", + "2020-06-04 0.486961 0.378323 0.186727 0.671816\n", + "2020-06-02 -0.412362 0.232540 -1.903134 -1.831848\n", + "2020-06-03 -1.898721 0.919976 0.485630 0.758721" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.sort_values(by='a', ascending=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abcd
2020-06-03-1.8987210.9199760.4856300.758721
2020-06-02-0.4123620.232540-1.903134-1.831848
2020-06-040.4869610.3783230.1867270.671816
2020-06-060.654506-0.0007270.417828-0.611751
2020-06-050.702523-0.5567980.635000-0.118564
2020-06-012.078888-0.959554-0.3672651.108948
\n", + "
" + ], + "text/plain": [ + " a b c d\n", + "2020-06-03 -1.898721 0.919976 0.485630 0.758721\n", + "2020-06-02 -0.412362 0.232540 -1.903134 -1.831848\n", + "2020-06-04 0.486961 0.378323 0.186727 0.671816\n", + "2020-06-06 0.654506 -0.000727 0.417828 -0.611751\n", + "2020-06-05 0.702523 -0.556798 0.635000 -0.118564\n", + "2020-06-01 2.078888 -0.959554 -0.367265 1.108948" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.sort_values(by=['a','b'], ascending=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 选取数据" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 选取某一列" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2020-06-01 2.078888\n", + "2020-06-02 -0.412362\n", + "2020-06-03 -1.898721\n", + "2020-06-04 0.486961\n", + "2020-06-05 0.702523\n", + "2020-06-06 0.654506\n", + "Freq: D, Name: a, dtype: float64" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['a'] # 等效于df.a" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 根据索引选取某几行数据" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abcd
2020-06-012.078888-0.959554-0.3672651.108948
2020-06-02-0.4123620.232540-1.903134-1.831848
\n", + "
" + ], + "text/plain": [ + " a b c d\n", + "2020-06-01 2.078888 -0.959554 -0.367265 1.108948\n", + "2020-06-02 -0.412362 0.232540 -1.903134 -1.831848" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['2020-06-01':'2020-06-02'] # 选取索引以2020-06-01开始,到2020-06-02结束的数据" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 根据列名选择某几列数据" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
cb
2020-06-01-0.367265-0.959554
2020-06-02-1.9031340.232540
2020-06-030.4856300.919976
2020-06-040.1867270.378323
2020-06-050.635000-0.556798
2020-06-060.417828-0.000727
\n", + "
" + ], + "text/plain": [ + " c b\n", + "2020-06-01 -0.367265 -0.959554\n", + "2020-06-02 -1.903134 0.232540\n", + "2020-06-03 0.485630 0.919976\n", + "2020-06-04 0.186727 0.378323\n", + "2020-06-05 0.635000 -0.556798\n", + "2020-06-06 0.417828 -0.000727" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[['c', 'b']]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 根据索引和列名选择数据\n", + "\n", + "loc[行名选择, 列名选择],未指定行名或列名,或者指定为:则表示选择当前所有行,或列" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "a 2.078888\n", + "b -0.959554\n", + "c -0.367265\n", + "d 1.108948\n", + "Name: 2020-06-01 00:00:00, dtype: float64" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.loc['2020-06-01']" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "-0.9595537865841992" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.loc['2020-06-01', 'b']" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2020-06-01 -0.959554\n", + "2020-06-02 0.232540\n", + "2020-06-03 0.919976\n", + "2020-06-04 0.378323\n", + "2020-06-05 -0.556798\n", + "2020-06-06 -0.000727\n", + "Freq: D, Name: b, dtype: float64" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.loc[:, 'b'] # type(df.loc[:, 'b']) === 'pandas.core.series.Series',而type(df.loc[:, ['b']]) === ’pandas.core.frame.DataFrame‘" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ab
2020-06-012.078888-0.959554
2020-06-02-0.4123620.232540
2020-06-03-1.8987210.919976
2020-06-040.4869610.378323
2020-06-050.702523-0.556798
2020-06-060.654506-0.000727
\n", + "
" + ], + "text/plain": [ + " a b\n", + "2020-06-01 2.078888 -0.959554\n", + "2020-06-02 -0.412362 0.232540\n", + "2020-06-03 -1.898721 0.919976\n", + "2020-06-04 0.486961 0.378323\n", + "2020-06-05 0.702523 -0.556798\n", + "2020-06-06 0.654506 -0.000727" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.loc[:, ['a', 'b']]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 根据行索引和列索引取数据" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2.0788876064798893" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.iloc[0,0] # === df.loc['2020-06-01', 'a']" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "a 2.078888\n", + "b -0.959554\n", + "c -0.367265\n", + "d 1.108948\n", + "Name: 2020-06-01 00:00:00, dtype: float64" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.iloc[0, :] # ==== df.loc['2020-06-01', :]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 根据布尔表达式表达式取数据\n", + "\n", + "只有当布尔表达式为真时的数据才会被选择" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abcd
2020-06-012.078888-0.959554-0.3672651.108948
\n", + "
" + ], + "text/plain": [ + " a b c d\n", + "2020-06-01 2.078888 -0.959554 -0.367265 1.108948" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[df.a > 1]" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abcd
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [a, b, c, d]\n", + "Index: []" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[(df['a'] > 1) & (df['d'] <0)]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 添加/删除列、更新、替换数据" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 设置某矩阵项值" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [], + "source": [ + "df.loc['2020-06-01', 'a'] = np.nan\n", + "df.loc['2020-06-06', 'c'] = np.nan" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abcd
2020-06-01NaN-0.959554-0.3672651.108948
2020-06-02-0.4123620.232540-1.903134-1.831848
2020-06-03-1.8987210.9199760.4856300.758721
2020-06-040.4869610.3783230.1867270.671816
2020-06-050.702523-0.5567980.635000-0.118564
2020-06-060.654506-0.000727NaN-0.611751
\n", + "
" + ], + "text/plain": [ + " a b c d\n", + "2020-06-01 NaN -0.959554 -0.367265 1.108948\n", + "2020-06-02 -0.412362 0.232540 -1.903134 -1.831848\n", + "2020-06-03 -1.898721 0.919976 0.485630 0.758721\n", + "2020-06-04 0.486961 0.378323 0.186727 0.671816\n", + "2020-06-05 0.702523 -0.556798 0.635000 -0.118564\n", + "2020-06-06 0.654506 -0.000727 NaN -0.611751" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 根据条件创建新列" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [], + "source": [ + "df['e'] = np.where((df['a'] > 1) & (df['d']<0), 1, 0)" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abcde
2020-06-01NaN-0.959554-0.3672651.1089480
2020-06-02-0.4123620.232540-1.903134-1.8318480
2020-06-03-1.8987210.9199760.4856300.7587210
2020-06-040.4869610.3783230.1867270.6718160
2020-06-050.702523-0.5567980.635000-0.1185640
2020-06-060.654506-0.000727NaN-0.6117510
\n", + "
" + ], + "text/plain": [ + " a b c d e\n", + "2020-06-01 NaN -0.959554 -0.367265 1.108948 0\n", + "2020-06-02 -0.412362 0.232540 -1.903134 -1.831848 0\n", + "2020-06-03 -1.898721 0.919976 0.485630 0.758721 0\n", + "2020-06-04 0.486961 0.378323 0.186727 0.671816 0\n", + "2020-06-05 0.702523 -0.556798 0.635000 -0.118564 0\n", + "2020-06-06 0.654506 -0.000727 NaN -0.611751 0" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 根据已有列创建新列" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [], + "source": [ + "tmp = df.copy()\n", + "df.loc[:,'f'] = tmp.apply(lambda row: row['b']+ row['d'], axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abcdef
2020-06-01NaN-0.959554-0.3672651.10894800.149394
2020-06-02-0.4123620.232540-1.903134-1.8318480-1.599308
2020-06-03-1.8987210.9199760.4856300.75872101.678697
2020-06-040.4869610.3783230.1867270.67181601.050139
2020-06-050.702523-0.5567980.635000-0.1185640-0.675362
2020-06-060.654506-0.000727NaN-0.6117510-0.612478
\n", + "
" + ], + "text/plain": [ + " a b c d e f\n", + "2020-06-01 NaN -0.959554 -0.367265 1.108948 0 0.149394\n", + "2020-06-02 -0.412362 0.232540 -1.903134 -1.831848 0 -1.599308\n", + "2020-06-03 -1.898721 0.919976 0.485630 0.758721 0 1.678697\n", + "2020-06-04 0.486961 0.378323 0.186727 0.671816 0 1.050139\n", + "2020-06-05 0.702523 -0.556798 0.635000 -0.118564 0 -0.675362\n", + "2020-06-06 0.654506 -0.000727 NaN -0.611751 0 -0.612478" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 替换数据" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abcdef
2020-06-01NaN-0.959554-0.3672651.10894800.149394
2020-06-02-0.4123620.232540-1.903134-1.8318480-1.599308
2020-06-03-1.8987210.9199760.4856300.75872101.678697
2020-06-040.4869610.3783230.1867270.67181601.050139
2020-06-050.702523-0.5567980.635000-0.1185640-0.675362
2020-06-060.654506-0.000727NaN-0.6117510-0.612478
\n", + "
" + ], + "text/plain": [ + " a b c d e f\n", + "2020-06-01 NaN -0.959554 -0.367265 1.108948 0 0.149394\n", + "2020-06-02 -0.412362 0.232540 -1.903134 -1.831848 0 -1.599308\n", + "2020-06-03 -1.898721 0.919976 0.485630 0.758721 0 1.678697\n", + "2020-06-04 0.486961 0.378323 0.186727 0.671816 0 1.050139\n", + "2020-06-05 0.702523 -0.556798 0.635000 -0.118564 0 -0.675362\n", + "2020-06-06 0.654506 -0.000727 NaN -0.611751 0 -0.612478" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 将所有等于1的值替换成20\n", + "df.replace(1,20)" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abcdef
2020-06-01NaN-0.959554-0.3672651.10894800.149394
2020-06-02-0.4123620.232540-1.903134-1.8318480-1.599308
2020-06-03-1.8987210.9199760.4856300.75872101.678697
2020-06-040.4869610.3783230.1867270.67181601.050139
2020-06-050.702523-0.5567980.635000-0.1185640-0.675362
2020-06-060.654506-0.000727NaN-0.6117510-0.612478
\n", + "
" + ], + "text/plain": [ + " a b c d e f\n", + "2020-06-01 NaN -0.959554 -0.367265 1.108948 0 0.149394\n", + "2020-06-02 -0.412362 0.232540 -1.903134 -1.831848 0 -1.599308\n", + "2020-06-03 -1.898721 0.919976 0.485630 0.758721 0 1.678697\n", + "2020-06-04 0.486961 0.378323 0.186727 0.671816 0 1.050139\n", + "2020-06-05 0.702523 -0.556798 0.635000 -0.118564 0 -0.675362\n", + "2020-06-06 0.654506 -0.000727 NaN -0.611751 0 -0.612478" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 使用one替换1,three替换3\n", + "df.replace([1,3],['one','three'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 列名重命名" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abccdef
2020-06-01NaN-0.959554-0.3672651.10894800.149394
2020-06-02-0.4123620.232540-1.903134-1.8318480-1.599308
2020-06-03-1.8987210.9199760.4856300.75872101.678697
2020-06-040.4869610.3783230.1867270.67181601.050139
2020-06-050.702523-0.5567980.635000-0.1185640-0.675362
2020-06-060.654506-0.000727NaN-0.6117510-0.612478
\n", + "
" + ], + "text/plain": [ + " a b cc d e f\n", + "2020-06-01 NaN -0.959554 -0.367265 1.108948 0 0.149394\n", + "2020-06-02 -0.412362 0.232540 -1.903134 -1.831848 0 -1.599308\n", + "2020-06-03 -1.898721 0.919976 0.485630 0.758721 0 1.678697\n", + "2020-06-04 0.486961 0.378323 0.186727 0.671816 0 1.050139\n", + "2020-06-05 0.702523 -0.556798 0.635000 -0.118564 0 -0.675362\n", + "2020-06-06 0.654506 -0.000727 NaN -0.611751 0 -0.612478" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.rename(columns={'c':'cc'})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 重设索引" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
bcdef
a
NaN-0.959554-0.3672651.10894800.149394
-0.4123620.232540-1.903134-1.8318480-1.599308
-1.8987210.9199760.4856300.75872101.678697
0.4869610.3783230.1867270.67181601.050139
0.702523-0.5567980.635000-0.1185640-0.675362
0.654506-0.000727NaN-0.6117510-0.612478
\n", + "
" + ], + "text/plain": [ + " b c d e f\n", + "a \n", + " NaN -0.959554 -0.367265 1.108948 0 0.149394\n", + "-0.412362 0.232540 -1.903134 -1.831848 0 -1.599308\n", + "-1.898721 0.919976 0.485630 0.758721 0 1.678697\n", + " 0.486961 0.378323 0.186727 0.671816 0 1.050139\n", + " 0.702523 -0.556798 0.635000 -0.118564 0 -0.675362\n", + " 0.654506 -0.000727 NaN -0.611751 0 -0.612478" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 将a设置为索引\n", + "df.set_index('a')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 删除列" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
bcde
2020-06-01-0.959554-0.3672651.1089480
2020-06-020.232540-1.903134-1.8318480
2020-06-030.9199760.4856300.7587210
2020-06-040.3783230.1867270.6718160
2020-06-05-0.5567980.635000-0.1185640
2020-06-06-0.000727NaN-0.6117510
\n", + "
" + ], + "text/plain": [ + " b c d e\n", + "2020-06-01 -0.959554 -0.367265 1.108948 0\n", + "2020-06-02 0.232540 -1.903134 -1.831848 0\n", + "2020-06-03 0.919976 0.485630 0.758721 0\n", + "2020-06-04 0.378323 0.186727 0.671816 0\n", + "2020-06-05 -0.556798 0.635000 -0.118564 0\n", + "2020-06-06 -0.000727 NaN -0.611751 0" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.drop(columns=['a', 'f'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 处理Nan数据" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 检查是否Nan值" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abcdef
2020-06-01TrueFalseFalseFalseFalseFalse
2020-06-02FalseFalseFalseFalseFalseFalse
2020-06-03FalseFalseFalseFalseFalseFalse
2020-06-04FalseFalseFalseFalseFalseFalse
2020-06-05FalseFalseFalseFalseFalseFalse
2020-06-06FalseFalseTrueFalseFalseFalse
\n", + "
" + ], + "text/plain": [ + " a b c d e f\n", + "2020-06-01 True False False False False False\n", + "2020-06-02 False False False False False False\n", + "2020-06-03 False False False False False False\n", + "2020-06-04 False False False False False False\n", + "2020-06-05 False False False False False False\n", + "2020-06-06 False False True False False False" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.isnull()" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abcdef
2020-06-01FalseTrueTrueTrueTrueTrue
2020-06-02TrueTrueTrueTrueTrueTrue
2020-06-03TrueTrueTrueTrueTrueTrue
2020-06-04TrueTrueTrueTrueTrueTrue
2020-06-05TrueTrueTrueTrueTrueTrue
2020-06-06TrueTrueFalseTrueTrueTrue
\n", + "
" + ], + "text/plain": [ + " a b c d e f\n", + "2020-06-01 False True True True True True\n", + "2020-06-02 True True True True True True\n", + "2020-06-03 True True True True True True\n", + "2020-06-04 True True True True True True\n", + "2020-06-05 True True True True True True\n", + "2020-06-06 True True False True True True" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.notnull() # df.isnull()反操作" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 删除掉包含null值的行" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abcdef
2020-06-02-0.4123620.232540-1.903134-1.8318480-1.599308
2020-06-03-1.8987210.9199760.4856300.75872101.678697
2020-06-040.4869610.3783230.1867270.67181601.050139
2020-06-050.702523-0.5567980.635000-0.1185640-0.675362
\n", + "
" + ], + "text/plain": [ + " a b c d e f\n", + "2020-06-02 -0.412362 0.232540 -1.903134 -1.831848 0 -1.599308\n", + "2020-06-03 -1.898721 0.919976 0.485630 0.758721 0 1.678697\n", + "2020-06-04 0.486961 0.378323 0.186727 0.671816 0 1.050139\n", + "2020-06-05 0.702523 -0.556798 0.635000 -0.118564 0 -0.675362" + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "### dropna(axis=, how=):丢弃NaN数据,\n", + "# axis:0-按行丢弃),1-按列丢弃; how:'any'-只要含有NaN数据就丢弃,'all'-所有数据都为NaN时丢弃\n", + "\n", + "df.dropna(axis=0)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 替换Nan" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abcdef
2020-06-011000.000000-0.959554-0.3672651.10894800.149394
2020-06-02-0.4123620.232540-1.903134-1.8318480-1.599308
2020-06-03-1.8987210.9199760.4856300.75872101.678697
2020-06-040.4869610.3783230.1867270.67181601.050139
2020-06-050.702523-0.5567980.635000-0.1185640-0.675362
2020-06-060.654506-0.0007271000.000000-0.6117510-0.612478
\n", + "
" + ], + "text/plain": [ + " a b c d e f\n", + "2020-06-01 1000.000000 -0.959554 -0.367265 1.108948 0 0.149394\n", + "2020-06-02 -0.412362 0.232540 -1.903134 -1.831848 0 -1.599308\n", + "2020-06-03 -1.898721 0.919976 0.485630 0.758721 0 1.678697\n", + "2020-06-04 0.486961 0.378323 0.186727 0.671816 0 1.050139\n", + "2020-06-05 0.702523 -0.556798 0.635000 -0.118564 0 -0.675362\n", + "2020-06-06 0.654506 -0.000727 1000.000000 -0.611751 0 -0.612478" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#### 使用1000替换Nan\n", + "df.fillna(1000)" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abcdef
2020-06-01-0.093418-0.959554-0.3672651.10894800.149394
2020-06-02-0.4123620.232540-1.903134-1.8318480-1.599308
2020-06-03-1.8987210.9199760.4856300.75872101.678697
2020-06-040.4869610.3783230.1867270.67181601.050139
2020-06-050.702523-0.5567980.635000-0.1185640-0.675362
2020-06-060.654506-0.000727-0.192608-0.6117510-0.612478
\n", + "
" + ], + "text/plain": [ + " a b c d e f\n", + "2020-06-01 -0.093418 -0.959554 -0.367265 1.108948 0 0.149394\n", + "2020-06-02 -0.412362 0.232540 -1.903134 -1.831848 0 -1.599308\n", + "2020-06-03 -1.898721 0.919976 0.485630 0.758721 0 1.678697\n", + "2020-06-04 0.486961 0.378323 0.186727 0.671816 0 1.050139\n", + "2020-06-05 0.702523 -0.556798 0.635000 -0.118564 0 -0.675362\n", + "2020-06-06 0.654506 -0.000727 -0.192608 -0.611751 0 -0.612478" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 使用平均值替换所有null值\n", + "df.fillna(df.mean())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 聚合、分组、统计" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 返回每一列的平均数" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "a -0.093418\n", + "b 0.002293\n", + "c -0.192608\n", + "d -0.003780\n", + "e 0.000000\n", + "f -0.001486\n", + "dtype: float64" + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.mean()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 返回列之间的相关性" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abcdef
a1.000000-0.8210880.055410-0.201634NaN-0.486299
b-0.8210881.0000000.024617-0.127603NaN0.441503
c0.0554100.0246171.0000000.743462NaN0.682118
d-0.201634-0.1276030.7434621.000000NaN0.833588
eNaNNaNNaNNaNNaNNaN
f-0.4862990.4415030.6821180.833588NaN1.000000
\n", + "
" + ], + "text/plain": [ + " a b c d e f\n", + "a 1.000000 -0.821088 0.055410 -0.201634 NaN -0.486299\n", + "b -0.821088 1.000000 0.024617 -0.127603 NaN 0.441503\n", + "c 0.055410 0.024617 1.000000 0.743462 NaN 0.682118\n", + "d -0.201634 -0.127603 0.743462 1.000000 NaN 0.833588\n", + "e NaN NaN NaN NaN NaN NaN\n", + "f -0.486299 0.441503 0.682118 0.833588 NaN 1.000000" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.corr()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 返回每一列中非null值数量" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "a 5\n", + "b 6\n", + "c 5\n", + "d 6\n", + "e 6\n", + "f 6\n", + "dtype: int64" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.count()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 返回每一列中最大值" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "a 0.702523\n", + "b 0.919976\n", + "c 0.635000\n", + "d 1.108948\n", + "e 0.000000\n", + "f 1.678697\n", + "dtype: float64" + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.max()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 返回每一列中最小值" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "a -1.898721\n", + "b -0.959554\n", + "c -1.903134\n", + "d -1.831848\n", + "e 0.000000\n", + "f -1.599308\n", + "dtype: float64" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.min()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 返回每一列的中值" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "a 0.486961\n", + "b 0.115907\n", + "c 0.186727\n", + "d 0.276626\n", + "e 0.000000\n", + "f -0.231542\n", + "dtype: float64" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.median()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 返回每一列的标准偏差" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "a 1.105735\n", + "b 0.674432\n", + "c 1.030199\n", + "d 1.095503\n", + "e 0.000000\n", + "f 1.210962\n", + "dtype: float64" + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.std()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 分组后取TopN" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nameagegenderscorecountry
1Molly32F94CN
4Amy23F70CN
6Tim27F67CN
2Tina36F57CN
5Jack18F88US
3Jake24F62US
0Jason20F25US
\n", + "
" + ], + "text/plain": [ + " name age gender score country\n", + "1 Molly 32 F 94 CN\n", + "4 Amy 23 F 70 CN\n", + "6 Tim 27 F 67 CN\n", + "2 Tina 36 F 57 CN\n", + "5 Jack 18 F 88 US\n", + "3 Jake 24 F 62 US\n", + "0 Jason 20 F 25 US" + ] + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "### 取每个国家下,分值前二的记录\n", + "\n", + "# 先排序\n", + "df4 = df3.sort_values(['country','score'],ascending=[1, 0],inplace=False)\n", + "df4" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nameagegenderscorecountry
1Molly32F94CN
4Amy23F70CN
5Jack18F88US
3Jake24F62US
\n", + "
" + ], + "text/plain": [ + " name age gender score country\n", + "1 Molly 32 F 94 CN\n", + "4 Amy 23 F 70 CN\n", + "5 Jack 18 F 88 US\n", + "3 Jake 24 F 62 US" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 取值\n", + "df4.groupby(['country']).head(2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 多重分组后取TopN" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nameagegenderscorecountry
1Molly32F94CN
4Amy23F70CN
6Tim27F67CN
2Tina36F57CN
5Jack18F88US
3Jake24F62US
0Jason20F25US
\n", + "
" + ], + "text/plain": [ + " name age gender score country\n", + "1 Molly 32 F 94 CN\n", + "4 Amy 23 F 70 CN\n", + "6 Tim 27 F 67 CN\n", + "2 Tina 36 F 57 CN\n", + "5 Jack 18 F 88 US\n", + "3 Jake 24 F 62 US\n", + "0 Jason 20 F 25 US" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "### 取每个国家下,分值前二的记录\n", + "\n", + "# 先排序\n", + "df5 = df3.sort_values(['country','gender', 'score'],ascending=[1, 0, 0],inplace=False)\n", + "df5" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nameagegenderscorecountry
1Molly32F94CN
5Jack18F88US
\n", + "
" + ], + "text/plain": [ + " name age gender score country\n", + "1 Molly 32 F 94 CN\n", + "5 Jack 18 F 88 US" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df5 = df5.groupby(['country', 'gender']).head(1) # 注意此处取1\n", + "df5" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nameagegenderscorecountry
1Molly32F94CN
5Jack18F88US
\n", + "
" + ], + "text/plain": [ + " name age gender score country\n", + "1 Molly 32 F 94 CN\n", + "5 Jack 18 F 88 US" + ] + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df5.groupby(['country']).head(2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 分组之后取平均值" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
score
gender
F66.142857
\n", + "
" + ], + "text/plain": [ + " score\n", + "gender \n", + "F 66.142857" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "scoreMean = df3.groupby(['gender'])['score'].mean()\n", + "scoreMean = pd.DataFrame(scoreMean) # 等效于socreMean = scoreMean.to_frame()\n", + "scoreMean" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nameagegenderscore_xcountryscore_y
0Jason20F25US66.142857
1Molly32F94CN66.142857
2Tina36F57CN66.142857
3Jake24F62US66.142857
4Amy23F70CN66.142857
5Jack18F88US66.142857
6Tim27F67CN66.142857
\n", + "
" + ], + "text/plain": [ + " name age gender score_x country score_y\n", + "0 Jason 20 F 25 US 66.142857\n", + "1 Molly 32 F 94 CN 66.142857\n", + "2 Tina 36 F 57 CN 66.142857\n", + "3 Jake 24 F 62 US 66.142857\n", + "4 Amy 23 F 70 CN 66.142857\n", + "5 Jack 18 F 88 US 66.142857\n", + "6 Tim 27 F 67 CN 66.142857" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#### 合并\n", + "df3.merge(scoreMean,left_on='gender',right_index=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nameagegenderscorecountry
0Jason20F25US
1Molly32F94CN
2Tina36F57CN
3Jake24F62US
4Amy23F70CN
5Jack18F88US
6Tim27F67CN
\n", + "
" + ], + "text/plain": [ + " name age gender score country\n", + "0 Jason 20 F 25 US\n", + "1 Molly 32 F 94 CN\n", + "2 Tina 36 F 57 CN\n", + "3 Jake 24 F 62 US\n", + "4 Amy 23 F 70 CN\n", + "5 Jack 18 F 88 US\n", + "6 Tim 27 F 67 CN" + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df3" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 分组之后计数" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
gender
country
CN4
US3
\n", + "
" + ], + "text/plain": [ + " gender\n", + "country \n", + "CN 4\n", + "US 3" + ] + }, + "execution_count": 68, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df3.groupby(['country'])['gender'].count().to_frame()" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
gender
countrygender
CNF4
USF3
\n", + "
" + ], + "text/plain": [ + " gender\n", + "country gender \n", + "CN F 4\n", + "US F 3" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "### 按性别统计每个国家的人数\n", + "\n", + "df3.groupby(['country', 'gender'])['gender'].count().to_frame()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 分组后唯一值统计" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
gender
country
CN1
US1
\n", + "
" + ], + "text/plain": [ + " gender\n", + "country \n", + "CN 1\n", + "US 1" + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df3.groupby(['country'])['gender'].nunique().to_frame()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 分组后求和" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
agescore
country
CN118288
US62175
\n", + "
" + ], + "text/plain": [ + " age score\n", + "country \n", + "CN 118 288\n", + "US 62 175" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 默认是所有数值类型列求和\n", + "df3.groupby('country').sum() " + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "country\n", + "CN 288\n", + "US 175\n", + "Name: score, dtype: int64" + ] + }, + "execution_count": 72, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 指定列求和\n", + "df3.groupby('country')['score'].sum() # 等效于df3.groupby(['country'])['score'].apply(np.sum)" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 73, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 分组后指定列求和(其他聚合也类似)可以理解成Split, apply, combine\n", + "Image(url=\"http://static.cyub.vip/images/202001/pandas.split-apply-combine.png\")" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAENCAYAAAD0eSVZAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAUtUlEQVR4nO3df5BV5Z3n8fdXISBKVJS4CsamdjD+QsB0MxpNoriC0SrFSkzpjtFx3GAlmjXZrBV0rZWpjFXZSjJWyM6aIgUDMc6Iu46rO/4YxJgYs/FHSwxEIcomKI0oLTGIjpi0fvePPpArNPTtvt195eH9qrp1z3nOc8793qL59NPPPefcyEwkSWXZp9kFSJIGnuEuSQUy3CWpQIa7JBXIcJekAhnuklSgXsM9IkZGxBMR8cuIeCYi/rpqnxARj0fEmohYEhEfqNpHVOtrqu0tg/sWJEk7qmfk/jYwPTMnA1OAsyPiZOC/ATdn5p8BrwFXVP2vAF6r2m+u+kmShlD05SKmiBgFPAp8AbgX+DeZ2RURpwBzM3NmRPxLtfzziBgGvAyMzd280KGHHpotLS2NvA9J2us89dRTr2bm2J62DavnABGxL/AU8GfA3wH/D/h9ZnZVXTqAcdXyOGAdQBX8m4FDgFd3dfyWlhba29vrKUWSVImIF3a1ra4PVDPzncycAowHpgHHDEBRsyOiPSLaOzs7Gz2cJKlGn86WyczfAw8DpwAHVdMu0B3666vl9cCRANX2A4FNPRxrfma2Zmbr2LE9/lUhSeqnes6WGRsRB1XL+wFnAavoDvnPVN0uA+6ulu+p1qm2/2h38+2SpIFXz5z74cDiat59H+COzPzniHgWuD0i/gb4BbCg6r8AuDUi1gC/Ay4ahLol7YX++Mc/0tHRwdatW5tdypAaOXIk48ePZ/jw4XXv02u4Z+YKYGoP7b+he/59x/atwIV1VyBJdero6GD06NG0tLQQEc0uZ0hkJps2baKjo4MJEybUvZ9XqEraY2zdupVDDjlkrwl2gIjgkEMO6fNfK4a7pD3K3hTs2/TnPRvuklSgui5i0h5m7oHNrqA+czc3uwLt4Vrm3Dugx1v7jXMH9HjN5Mhdkvpo1qxZfPSjH+X4449n/vz5ACxYsICjjz6aadOm8fnPf56rr74agM7OTj796U/T1tZGW1sbP/vZz4akRkfuktRHCxcuZMyYMbz11lu0tbVx7rnn8vWvf53ly5czevRopk+fzuTJkwG45ppr+MpXvsJpp53Giy++yMyZM1m1atWg12i4S1IfzZs3j7vuuguAdevWceutt/LJT36SMWPGAHDhhRfy3HPPAbBs2TKeffbZ7fu+/vrrvPHGGxxwwAGDWqPhLkl98OMf/5hly5bx85//nFGjRnH66adzzDHH7HI0/u677/LYY48xcuTIIa3TOXdJ6oPNmzdz8MEHM2rUKFavXs1jjz3Gm2++yU9+8hNee+01urq6uPPOO7f3nzFjBt/97ne3rz/99NNDUqfhLkl9cPbZZ9PV1cWxxx7LnDlzOPnkkxk3bhzXX38906ZN49RTT6WlpYUDD+w+a23evHm0t7dz4oknctxxx/G9731vSOp0WkbSHqsZpy6OGDGC+++/f6f21tZWZs+eTVdXFxdccAGzZs0C4NBDD2XJkiVDXaYjd0kaCHPnzmXKlCmccMIJTJgwYXu4N4sjd0kaAN/61reaXcJ7OHKXpAIZ7pJUIMNdkgpkuEtSgQx3SSqQZ8tI2nMN9O2th/g21F1dXQwbNjgx7MhdkvrgzTff5Nxzz2Xy5MmccMIJLFmyhCeffJKPfexjTJ48mWnTprFlyxa2bt3K5ZdfzqRJk5g6dSoPP/wwAIsWLeK8885j+vTpnHnmmQB885vfpK2tjRNPPJEbb7xxQOp05C5JffDAAw9wxBFHcO+93V8UsnnzZqZOncqSJUtoa2vj9ddfZ7/99uM73/kOEcHKlStZvXo1M2bM2H6nyOXLl7NixQrGjBnD0qVLef7553niiSfITM477zweeeQRPvGJTzRUpyN3SeqDSZMm8eCDD/K1r32Nn/70p7z44oscfvjhtLW1AfDBD36QYcOG8eijj3LJJZcAcMwxx3DUUUdtD/ezzjpr++2Bly5dytKlS5k6dSonnXQSq1ev5vnnn2+4TkfuktQHRx99NMuXL+e+++7jhhtuYPr06X0+xv777799OTO57rrruPLKKweyTEfuktQXL730EqNGjeKSSy7h2muv5fHHH2fDhg08+eSTAGzZsoWuri4+/vGPc9tttwHw3HPP8eKLL/KRj3xkp+PNnDmThQsX8sYbbwCwfv16Nm7c2HCdjtwlqQ9WrlzJtddeyz777MPw4cO55ZZbyEy+9KUv8dZbb7HffvuxbNkyvvjFL/KFL3yBSZMmMWzYMBYtWsSIESN2Ot6MGTNYtWoVp5xyCgAHHHAAP/zhD/nQhz7UUJ2RmQ0dYCC0trZme3t7s8sox0CfHjZYhvi0M+35Vq1axbHHHtvsMpqip/ceEU9lZmtP/Z2WkaQC9RruEXFkRDwcEc9GxDMRcU3VPjci1kfE09XjnJp9rouINRHx64iYOZhvQJK0s3rm3LuAr2bm8ogYDTwVEQ9W227OzPfcxDgijgMuAo4HjgCWRcTRmfnOQBYuSdq1XkfumbkhM5dXy1uAVcC43exyPnB7Zr6dmb8F1gDTBqJYSXo/fE441Prznvs05x4RLcBU4PGq6eqIWBERCyPi4KptHLCuZrcOdv/LQJLqMnLkSDZt2rRXBXxmsmnTJkaOHNmn/eo+FTIiDgDuBL6cma9HxC3A14Gsnr8N/FUfjjcbmA3w4Q9/uC81S9pLjR8/no6ODjo7O5tdypAaOXIk48eP79M+dYV7RAynO9hvy8x/AsjMV2q2fx/452p1PXBkze7jq7b3yMz5wHzoPhWyT1VL2isNHz6cCRMmNLuMPUI9Z8sEsABYlZl/W9N+eE23C4BfVcv3ABdFxIiImABMBJ4YuJIlSb2pZ+R+KvA5YGVEPF21XQ9cHBFT6J6WWQtcCZCZz0TEHcCzdJ9pc5VnykjS0Oo13DPzUSB62HTfbva5CbipgbokSQ3wClVJKpDhLkkFMtwlqUCGuyQVyHCXpAIZ7pJUIMNdkgpkuEtSgQx3SSqQ4S5JBTLcJalAhrskFchwl6QCGe6SVCDDXZIKZLhLUoEMd0kqkOEuSQUy3CWpQIa7JBXIcJekAhnuklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUC9hntEHBkRD0fEsxHxTERcU7WPiYgHI+L56vngqj0iYl5ErImIFRFx0mC/CUnSe9Uzcu8CvpqZxwEnA1dFxHHAHOChzJwIPFStA3wKmFg9ZgO3DHjVkqTd6jXcM3NDZi6vlrcAq4BxwPnA4qrbYmBWtXw+8IPs9hhwUEQcPuCVS5J2qU9z7hHRAkwFHgcOy8wN1aaXgcOq5XHAuprdOqo2SdIQqTvcI+IA4E7gy5n5eu22zEwg+/LCETE7Itojor2zs7Mvu0qSelFXuEfEcLqD/bbM/Keq+ZVt0y3V88aqfT1wZM3u46u298jM+ZnZmpmtY8eO7W/9kqQe1HO2TAALgFWZ+bc1m+4BLquWLwPurmm/tDpr5mRgc830jSRpCAyro8+pwOeAlRHxdNV2PfAN4I6IuAJ4Afhste0+4BxgDfCvwOUDWrEkqVe9hntmPgrELjaf2UP/BK5qsC5JUgO8QlWSCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQVyHCXpAIZ7pJUIMNdkgpkuEtSgQx3SSqQ4S5JBTLcJalA9dzyV5IGxtwDm11BfeZubnYFDXPkLkkFMtwlqUCGuyQVyHCXpAIZ7pJUIMNdkgpkuEtSgQx3SSqQ4S5JBTLcJalAhrskFchwl6QCGe6SVKBewz0iFkbExoj4VU3b3IhYHxFPV49zarZdFxFrIuLXETFzsAqXJO1aPSP3RcDZPbTfnJlTqsd9ABFxHHARcHy1z/+IiH0HqlhJUn16DffMfAT4XZ3HOx+4PTPfzszfAmuAaQ3UJ0nqh0bm3K+OiBXVtM3BVds4YF1Nn46qTZI0hPob7rcA/xaYAmwAvt3XA0TE7Ihoj4j2zs7OfpYhSepJv8I9M1/JzHcy813g+/xp6mU9cGRN1/FVW0/HmJ+ZrZnZOnbs2P6UIUnahX6Fe0QcXrN6AbDtTJp7gIsiYkRETAAmAk80VqIkqa96/YLsiPhH4HTg0IjoAG4ETo+IKUACa4ErATLzmYi4A3gW6AKuysx3Bqd0SdKu9BrumXlxD80LdtP/JuCmRoqSJDXGK1QlqUCGuyQVyHCXpAIZ7pJUIMNdkgpkuEtSgQx3SSqQ4S5JBTLcJalAhrskFchwl6QCGe6SVCDDXZIKZLhLUoEMd0kqkOEuSQUy3CWpQIa7JBWo16/Z05+0zLm32SXUZe3IZlcgqdkcuUtSgQx3SSqQ4S5JBTLcJalAhrskFchwl6QCGe6SVCDDXZIK1Gu4R8TCiNgYEb+qaRsTEQ9GxPPV88FVe0TEvIhYExErIuKkwSxektSzekbui4Czd2ibAzyUmROBh6p1gE8BE6vHbOCWgSlTktQXvYZ7Zj4C/G6H5vOBxdXyYmBWTfsPsttjwEERcfhAFStJqk9/59wPy8wN1fLLwGHV8jhgXU2/jqpNkjSEGv5ANTMTyL7uFxGzI6I9Ito7OzsbLUOSVKO/4f7KtumW6nlj1b4eOLKm3/iqbSeZOT8zWzOzdezYsf0sQ5LUk/6G+z3AZdXyZcDdNe2XVmfNnAxsrpm+kSQNkV7v5x4R/wicDhwaER3AjcA3gDsi4grgBeCzVff7gHOANcC/ApcPQs2SpF70Gu6ZefEuNp3ZQ98Ermq0KElSY7xCVZIKZLhLUoEMd0kqkOEuSQUy3CWpQIa7JBXIcJekAhnuklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQVyHCXpAIZ7pJUIMNdkgpkuEtSgQx3SSqQ4S5JBTLcJalAhrskFWhYswuQ1LiWOfc2u4S6rB3Z7Ar2Ho7cJalAhrskFchwl6QCGe6SVKCGPlCNiLXAFuAdoCszWyNiDLAEaAHWAp/NzNcaK1OS1BcDMXI/IzOnZGZrtT4HeCgzJwIPVeuSpCE0GNMy5wOLq+XFwKxBeA1J0m40Gu4JLI2IpyJidtV2WGZuqJZfBg7raceImB0R7RHR3tnZ2WAZkqRajV7EdFpmro+IDwEPRsTq2o2ZmRGRPe2YmfOB+QCtra099pEk9U9DI/fMXF89bwTuAqYBr0TE4QDV88ZGi5Qk9U2/wz0i9o+I0duWgRnAr4B7gMuqbpcBdzdapCSpbxqZljkMuCsith3nHzLzgYh4ErgjIq4AXgA+23iZkqS+6He4Z+ZvgMk9tG8CzmykKElSY7xCVZIKZLhLUoEMd0kqkOEuSQUy3CWpQIa7JBXIcJekAhnuklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQVyHCXpAIZ7pJUIMNdkgpkuEtSgQx3SSqQ4S5JBTLcJalAhrskFchwl6QCGe6SVCDDXZIKZLhLUoEMd0kq0KCFe0ScHRG/jog1ETFnsF5HkrSzQQn3iNgX+DvgU8BxwMURcdxgvJYkaWeDNXKfBqzJzN9k5h+A24HzB+m1JEk7GDZIxx0HrKtZ7wD+vLZDRMwGZlerb0TErweplr1OwKHAq82uo1d/Hc2uQEPMn80Bd9SuNgxWuPcqM+cD85v1+iWLiPbMbG12HdKO/NkcOoM1LbMeOLJmfXzVJkkaAoMV7k8CEyNiQkR8ALgIuGeQXkuStINBmZbJzK6IuBr4F2BfYGFmPjMYr6UeOd2l9yt/NodIZGaza5AkDTCvUJWkAhnuklQgw12SCmS4S1KBmnYRkxoXEZfubntm/mCoapHqEREHA79Pz+QYdJ4tsweLiO/uYtN5wLjM9Je3miYi/itwR2aujogRwAPAZKAL+PeZuaypBRbOcC9ERATwF8DXgGeBmzJzRXOr0t4sIp4BTsjMrO4ldTHw74CjgcWZOa2pBRbOkd0eLiKGAX8J/GfgMeAzmelN2PR+8Iea6ZeZwO2Z+Q6wqvq51SDyA9U9WERcRfco/aPA2Zn5lwa73kfejogTImIscAawtGbbqCbVtNdwWmYPFhHvAhuBTmCnf8jMPHHIi5IqEfHnwGJgLHBzZv5N1X4O8LnMvLiZ9ZXOcN+DRcRE4DDee+986L4j58uZuWboq5K6RcR/ArbdGD2rx6vAo5n526YVtpdwWmbPdjOwOTNfqH0Am6ttUjONBg6oHqOBDwKtwP0RcVEzC9sbOHLfg0XEk5nZtottKzNz0lDXJPUmIsYAyzLzpGbXUjJH7nu2g3azbb8hq0Lqg8z8HX+artEgMdz3bO0R8fkdGyPiPwBPNaEeqVcRcQbwWrPrKJ3TMnuwiDgMuAv4A38K81bgA8AFmflys2qTImIlO5/FNQZ4Cbg0M1cPfVV7D8O9ANVI6IRq9ZnM/FEz65EAIuKoHZoS2JSZbzajnr2N4S5JBXLOXZIKZLhLUoEMd6kfIuLLEeH9UfS+5Zy71A8RsRZozcxXe9i2b3X3Q6lpHLmrWBFxaUSsiIhfRsStEdESET+q2h6KiA9X/RZFxGdq9nujej49In4cEf8rIlZHxG3R7T8CRwAPR8TD2/aJiG9HxC+B/xIR/7vmeGdFxF1D+ua11/OeyipSRBwP3AB8LDNfrS55X0z3l0Qsjoi/AuYBs3o51FTgeLrPzf4ZcGpmzqtuinVGzch9f+DxzPxq9cUpqyJibGZ2ApcDCwf8TUq74chdpZoO/M9t4Vtd8n4K8A/V9luB0+o4zhOZ2ZGZ7wJPAy276PcOcGf1Wlkd/5KIOKh63fv7+T6kfnHkLnV/p+c+ABGxD91X+G7zds3yO+z6/8zWHebZ/x74P8BWun/JdA1cuVLvHLmrVD8CLoyIQ2D7nQj/L7DtVrN/Afy0Wl5L97dZQfeXiw+v4/hb6L6NbY8y8yW6p3JuoDvopSHlyF1FysxnIuIm4CcR8Q7wC+BLwN9HxLV0f3vV5VX37wN3Vx+GPgDUc3n8fOCBiHgpM8/YRZ/bgLGZuaqR9yL1h6dCSoMkIv478IvMXNDsWrT3MdylQRART9H9F8BZmfl2b/2lgWa4S1KB/EBVkgpkuEtSgQx3SSqQ4S5JBTLcJalAhrskFej/A3tgqEh0rq73AAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.clf()\n", + "df3.groupby('country').sum().plot(kind='bar')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 76, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAENCAYAAAD0eSVZAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAP8ElEQVR4nO3df6xfdX3H8ecLqswfbIDcdVg6L9GaBdws7g5x+gfoFMRkxURJmZPK2GoW3DRzy9CZ6RJJMJmSsDmSGtBqUOymjG4iioXNsU3wVrFQCrHTEloLvQgizohpee+Pezq+lHt7f3zv937pp89H8s33nM/nc855f8PldU8/95zzTVUhSWrLEcMuQJK08Ax3SWqQ4S5JDTLcJalBhrskNchwl6QGzRjuSX4hye1JvpNka5K/6dpPSnJbku1JPp/k2V37Ud369q5/dLAfQZJ0oNmcuT8OvLaqXg6sBM5OcjrwEeDyqnoJ8AhwUTf+IuCRrv3ybpwkaRFlLjcxJXkucCvwx8CXgF+pqr1JXgV8qKrOSvKVbvm/kywBHgBG6iAHOv7442t0dLSfzyFJh53Nmzc/VFUjU/Utmc0OkhwJbAZeAnwc+B/gR1W1txuyE1jWLS8D7gfogv9R4AXAQ9Ptf3R0lPHx8dmUIknqJLlvur5Z/UG1qvZV1UrgROA04NcWoKi1ScaTjE9MTPS7O0lSjzldLVNVPwJuAV4FHNNNu8Bk6O/qlncBywG6/l8CfjjFvtZV1VhVjY2MTPmvCknSPM3mapmRJMd0y88BXg9sYzLk39INWwNc3y1v7Nbp+m8+2Hy7JGnhzWbO/QRgfTfvfgSwoar+NcndwLVJPgx8G7iqG38V8Jkk24GHgdUDqFuSdBAzhntVbQFOnaL9e0zOvx/Y/jPgrQtSnSRpXrxDVZIaZLhLUoMMd0lq0KxuYtKk0Uu+NOwSmrLjsjcNuwSpWZ65S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktSgGcM9yfIktyS5O8nWJO/u2j+UZFeSO7rXOT3bvC/J9iT3JjlrkB9AkvR0S2YxZi/w3qr6VpKjgc1Jbur6Lq+qv+0dnORkYDVwCvBC4GtJXlpV+xaycEnS9GY8c6+q3VX1rW75MWAbsOwgm6wCrq2qx6vq+8B24LSFKFaSNDtzmnNPMgqcCtzWNb0ryZYkVyc5tmtbBtzfs9lODv7LQJK0wGYd7kmeD3wBeE9V/Ri4EngxsBLYDXx0LgdOsjbJeJLxiYmJuWwqSZrBrMI9ybOYDPZrquqLAFX1YFXtq6ongE/w5NTLLmB5z+Yndm1PUVXrqmqsqsZGRkb6+QySpAPM5mqZAFcB26rqYz3tJ/QMezNwV7e8EVid5KgkJwErgNsXrmRJ0kxmc7XMq4G3A3cmuaNrez9wfpKVQAE7gHcCVNXWJBuAu5m80uZir5SRpMU1Y7hX1a1Apui64SDbXApc2kddkqQ+eIeqJDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktSgGcM9yfIktyS5O8nWJO/u2o9LclOS73bvx3btSXJFku1JtiR5xaA/hCTpqWZz5r4XeG9VnQycDlyc5GTgEmBTVa0ANnXrAG8EVnSvtcCVC161JOmgZgz3qtpdVd/qlh8DtgHLgFXA+m7YeuDcbnkV8Oma9A3gmCQnLHjlkqRpzWnOPckocCpwG7C0qnZ3XQ8AS7vlZcD9PZvt7NokSYtk1uGe5PnAF4D3VNWPe/uqqoCay4GTrE0ynmR8YmJiLptKkmYwq3BP8iwmg/2aqvpi1/zg/umW7n1P174LWN6z+Yld21NU1bqqGquqsZGRkfnWL0mawmyulglwFbCtqj7W07URWNMtrwGu72m/oLtq5nTg0Z7pG0nSIlgyizGvBt4O3Jnkjq7t/cBlwIYkFwH3Aed1fTcA5wDbgZ8CFy5oxZKkGc0Y7lV1K5Bpul83xfgCLu6zLklSH7xDVZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGzeeSvpGe40Uu+NOwSmrLjsjcNu4S+eeYuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUoBnDPcnVSfYkuaun7UNJdiW5o3ud09P3viTbk9yb5KxBFS5Jmt5sztw/BZw9RfvlVbWye90AkORkYDVwSrfNPyQ5cqGKlSTNzozhXlVfBx6e5f5WAddW1eNV9X1gO3BaH/VJkuahnzn3dyXZ0k3bHNu1LQPu7xmzs2uTJC2i+Yb7lcCLgZXAbuCjc91BkrVJxpOMT0xMzLMMSdJU5hXuVfVgVe2rqieAT/Dk1MsuYHnP0BO7tqn2sa6qxqpqbGRkZD5lSJKmMa9wT3JCz+qbgf1X0mwEVic5KslJwArg9v5KlCTN1YxfkJ3kc8AZwPFJdgIfBM5IshIoYAfwToCq2ppkA3A3sBe4uKr2DaZ0SdJ0Zgz3qjp/iuarDjL+UuDSfoqSJPXHO1QlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAbNGO5Jrk6yJ8ldPW3HJbkpyXe792O79iS5Isn2JFuSvGKQxUuSpjabM/dPAWcf0HYJsKmqVgCbunWANwIrutda4MqFKVOSNBczhntVfR14+IDmVcD6bnk9cG5P+6dr0jeAY5KcsFDFSpJmZ75z7kurane3/ACwtFteBtzfM25n1yZJWkR9/0G1qgqouW6XZG2S8STjExMT/ZYhSeox33B/cP90S/e+p2vfBSzvGXdi1/Y0VbWuqsaqamxkZGSeZUiSpjLfcN8IrOmW1wDX97Rf0F01czrwaM/0jSRpkSyZaUCSzwFnAMcn2Ql8ELgM2JDkIuA+4Lxu+A3AOcB24KfAhQOoWZI0gxnDvarOn6brdVOMLeDifouSJPXHO1QlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYt6WfjJDuAx4B9wN6qGktyHPB5YBTYAZxXVY/0V6YkaS4W4sz9zKpaWVVj3folwKaqWgFs6tYlSYtoENMyq4D13fJ64NwBHEOSdBD9hnsBX02yOcnarm1pVe3ulh8Alk61YZK1ScaTjE9MTPRZhiSpV19z7sBrqmpXkl8GbkpyT29nVVWSmmrDqloHrAMYGxubcowkaX76OnOvql3d+x7gOuA04MEkJwB073v6LVKSNDfzDvckz0ty9P5l4A3AXcBGYE03bA1wfb9FSpLmpp9pmaXAdUn27+ezVXVjkm8CG5JcBNwHnNd/mZKkuZh3uFfV94CXT9H+Q+B1/RQlSeqPd6hKUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0aWLgnOTvJvUm2J7lkUMeRJD3dQMI9yZHAx4E3AicD5yc5eRDHkiQ93aDO3E8DtlfV96rq58C1wKoBHUuSdIAlA9rvMuD+nvWdwCt7ByRZC6ztVn+S5N4B1XI4Oh54aNhFzCQfGXYFGgJ/NhfWi6brGFS4z6iq1gHrhnX8liUZr6qxYdchHcifzcUzqGmZXcDynvUTuzZJ0iIYVLh/E1iR5KQkzwZWAxsHdCxJ0gEGMi1TVXuTvAv4CnAkcHVVbR3EsTQlp7v0TOXP5iJJVQ27BknSAvMOVUlqkOEuSQ0y3CWpQYa7JDVoaDcxqX9JLjhYf1V9erFqkWYjybHAj8orOQbOq2UOYUn+bpqu3wWWVZW/vDU0Sf4a2FBV9yQ5CrgReDmwF/i9qvraUAtsnOHeiCQB3gb8JXA3cGlVbRluVTqcJdkKvKyqqnuW1PnA7wAvBdZX1WlDLbBxntkd4pIsAd4B/DnwDeAtVeVD2PRM8POe6ZezgGurah+wrfu51QD5B9VDWJKLmTxL/03g7Kp6h8GuZ5DHk7wsyQhwJvDVnr7nDqmmw4bTMoewJE8Ae4AJ4Gn/IavqNxa9KKmT5JXAemAEuLyqPty1nwO8varOH2Z9rTPcD2FJVgBLeeqz82HyiZwPVNX2xa9KmpTkz4B0q9W9HgJurarvD62ww4TTMoe2y4FHq+q+3hfwaNcnDdPRwPO719HALwJjwJeTrB5mYYcDz9wPYUm+WVW/NU3fnVX164tdkzSTJMcBX6uqVwy7lpZ55n5oO+Ygfc9ZtCqkOaiqh3lyukYDYrgf2saT/NGBjUn+ENg8hHqkGSU5E3hk2HW0zmmZQ1iSpcB1wM95MszHgGcDb66qB4ZVm5TkTp5+FddxwA+AC6rqnsWv6vBhuDegOxN6Wbe6tapuHmY9EkCSFx3QVMAPq+p/h1HP4cZwl6QGOecuSQ0y3CWpQYa7NA9J3pPE56PoGcs5d2kekuwAxqrqoSn6juyefigNjWfualaSC5JsSfKdJJ9JMprk5q5tU5Jf7cZ9Kslberb7Sfd+RpJ/S/JPSe5Jck0m/SnwQuCWJLfs3ybJR5N8B/irJP/cs7/XJ7luUT+8Dns+U1lNSnIK8AHgt6vqoe6W9/VMfknE+iR/AFwBnDvDrk4FTmHy2uz/BF5dVVd0D8U6s+fM/XnAbVX13u6LU7YlGamqCeBC4OoF/5DSQXjmrla9FvjH/eHb3fL+KuCzXf9ngNfMYj+3V9XOqnoCuAMYnWbcPuAL3bGq2//vJzmmO+6X5/k5pHnxzF2a/E7PIwCSHMHkHb77Pd6zvI/p/5/52QHz7J8E/gX4GZO/ZPYuXLnSzDxzV6tuBt6a5AXw/08i/C9g/6Nm3wb8R7e8g8lvs4LJLxd/1iz2/xiTj7GdUlX9gMmpnA8wGfTSovLMXU2qqq1JLgX+Pck+4NvAnwCfTPIXTH571YXd8E8A13d/DL0RmM3t8euAG5P8oKrOnGbMNcBIVW3r57NI8+GlkNKAJPl74NtVddWwa9Hhx3CXBiDJZib/BfD6qnp8pvHSQjPcJalB/kFVkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNej/AIl4KMTyIG3lAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "df3.groupby('country')['score'].sum().plot(kind='bar')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 分组后求平均值,最大值,最小值" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
score
minmaxmean
country
CN579472.000000
US258858.333333
\n", + "
" + ], + "text/plain": [ + " score \n", + " min max mean\n", + "country \n", + "CN 57 94 72.000000\n", + "US 25 88 58.333333" + ] + }, + "execution_count": 77, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df3.groupby('country').agg({'score':['min','max','mean']})" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
aminamaxmean
country
CN579472.000000
US258858.333333
\n", + "
" + ], + "text/plain": [ + " amin amax mean\n", + "country \n", + "CN 57 94 72.000000\n", + "US 25 88 58.333333" + ] + }, + "execution_count": 78, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 跟上面效果一致\n", + "df3.groupby('country')['score'].agg([np.min, np.max, np.mean])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 分组后不同列使用不同求值函数" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
scoreage
maxminstdsumcountmax
country
CN945715.684387118436
US882531.65964862324
\n", + "
" + ], + "text/plain": [ + " score age \n", + " max min std sum count max\n", + "country \n", + "CN 94 57 15.684387 118 4 36\n", + "US 88 25 31.659648 62 3 24" + ] + }, + "execution_count": 79, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df3.groupby('country').agg({'score': ['max','min', 'std'],\n", + " 'age': ['sum', 'count', 'max']})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 多个分组结果拼接" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
scoreage
country
CN72.000000118
US58.33333362
\n", + "
" + ], + "text/plain": [ + " score age\n", + "country \n", + "CN 72.000000 118\n", + "US 58.333333 62" + ] + }, + "execution_count": 80, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "t1=df3.groupby('country')['score'].mean().to_frame()\n", + "t2 = df3.groupby('country')['age'].sum().to_frame()\n", + "\n", + "t1.merge(t2,left_index=True,right_index=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 遍历分组" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CN\n", + " name age gender score country\n", + "1 Molly 32 F 94 CN\n", + "2 Tina 36 F 57 CN\n", + "4 Amy 23 F 70 CN\n", + "6 Tim 27 F 67 CN\n", + "US\n", + " name age gender score country\n", + "0 Jason 20 F 25 US\n", + "3 Jake 24 F 62 US\n", + "5 Jack 18 F 88 US\n" + ] + } + ], + "source": [ + "grouped = df3.groupby('country')\n", + "for name,group in grouped:\n", + " print(name)\n", + " print(group)" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "('CN', 'F')\n", + " name age gender score country\n", + "1 Molly 32 F 94 CN\n", + "2 Tina 36 F 57 CN\n", + "4 Amy 23 F 70 CN\n", + "6 Tim 27 F 67 CN\n", + "('US', 'F')\n", + " name age gender score country\n", + "0 Jason 20 F 25 US\n", + "3 Jake 24 F 62 US\n", + "5 Jack 18 F 88 US\n" + ] + } + ], + "source": [ + "grouped = df3.groupby(['country', 'gender'])\n", + "for name,group in grouped:\n", + " print(name)\n", + " print(group)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 获取分组信息" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'CN': Int64Index([1, 2, 4, 6], dtype='int64'),\n", + " 'US': Int64Index([0, 3, 5], dtype='int64')}" + ] + }, + "execution_count": 83, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df3.groupby('country').groups" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 取分组后的某一组" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nameagegenderscorecountry
1Molly32F94CN
2Tina36F57CN
4Amy23F70CN
6Tim27F67CN
\n", + "
" + ], + "text/plain": [ + " name age gender score country\n", + "1 Molly 32 F 94 CN\n", + "2 Tina 36 F 57 CN\n", + "4 Amy 23 F 70 CN\n", + "6 Tim 27 F 67 CN" + ] + }, + "execution_count": 84, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df3.groupby('country').get_group('CN')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 分组后过滤" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nameagegenderscorecountry
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [name, age, gender, score, country]\n", + "Index: []" + ] + }, + "execution_count": 85, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df3.groupby('name').filter(lambda x: len(x) >= 3)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 数据透视" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
scoreage
genderFF
name
Amy7023
Jack8818
Jake6224
Jason2520
Molly9432
Tim6727
Tina5736
\n", + "
" + ], + "text/plain": [ + " score age\n", + "gender F F\n", + "name \n", + "Amy 70 23\n", + "Jack 88 18\n", + "Jake 62 24\n", + "Jason 25 20\n", + "Molly 94 32\n", + "Tim 67 27\n", + "Tina 57 36" + ] + }, + "execution_count": 86, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 数据透视的值项只能是数值类型\n", + "# pivot(index =,columns=,values=):透视数据\n", + "# index:透视的列(作为索引, 且值都是唯一的); columns-用于进一步细分index;values查看具体值\n", + "\n", + "df3.pivot(index ='name',columns='gender',values=['score','age'])" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
score
countrygender
CNF288
USF175
\n", + "
" + ], + "text/plain": [ + " score\n", + "country gender \n", + "CN F 288\n", + "US F 175" + ] + }, + "execution_count": 87, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# pivot_table(index =,columns=,values=):透视数据\n", + "# index:透视的列(作为索引, 且值都是唯一的); columns-用于进一步细分index;values查看具体值;fill_value:0-用0替换Nan; margins:True-汇总\n", + "\n", + "pd.pivot_table(df3,index=['country', 'gender'], values=['score'],aggfunc=np.sum)" + ] + }, + { + "cell_type": "code", + "execution_count": 88, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
summean
agescoreagescore
countrygender
CNF11828829.50000072.000000
USF6217520.66666758.333333
All18046325.71428666.142857
\n", + "
" + ], + "text/plain": [ + " sum mean \n", + " age score age score\n", + "country gender \n", + "CN F 118 288 29.500000 72.000000\n", + "US F 62 175 20.666667 58.333333\n", + "All 180 463 25.714286 66.142857" + ] + }, + "execution_count": 88, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.pivot_table(df3,index=['country', 'gender'], values=['score', 'age'],aggfunc=[np.sum, np.mean],fill_value=0,margins=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 89, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nameagegenderscorecountry
0Jason20F25US
1Molly32F94CN
2Tina36F57CN
3Jake24F62US
4Amy23F70CN
5Jack18F88US
6Tim27F67CN
\n", + "
" + ], + "text/plain": [ + " name age gender score country\n", + "0 Jason 20 F 25 US\n", + "1 Molly 32 F 94 CN\n", + "2 Tina 36 F 57 CN\n", + "3 Jake 24 F 62 US\n", + "4 Amy 23 F 70 CN\n", + "5 Jack 18 F 88 US\n", + "6 Tim 27 F 67 CN" + ] + }, + "execution_count": 89, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df3" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 合并、连接、拼接(Merge, join, and concatenate)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 拼接(concatenate)" + ] + }, + { + "cell_type": "code", + "execution_count": 90, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-----t1----\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABCD
0A0B0C0D0
1A1B1C1D1
2A2B2C2D2
3A3B3C3D3
\n", + "
" + ], + "text/plain": [ + " A B C D\n", + "0 A0 B0 C0 D0\n", + "1 A1 B1 C1 D1\n", + "2 A2 B2 C2 D2\n", + "3 A3 B3 C3 D3" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "----t2-----\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABCD
4A4B4C4D4
5A5B5C5D5
6A6B6C6D6
7A7B7C7D7
\n", + "
" + ], + "text/plain": [ + " A B C D\n", + "4 A4 B4 C4 D4\n", + "5 A5 B5 C5 D5\n", + "6 A6 B6 C6 D6\n", + "7 A7 B7 C7 D7" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-----t3----\n", + " A B C D\n", + "4 A4 B4 C4 D4\n", + "5 A5 B5 C5 D5\n", + "6 A6 B6 C6 D6\n", + "7 A7 B7 C7 D7\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABCD
0A0B0C0D0
1A1B1C1D1
2A2B2C2D2
3A3B3C3D3
4A4B4C4D4
5A5B5C5D5
6A6B6C6D6
7A7B7C7D7
8A8B8C8D8
9A9B9C9D9
10A10B10C10D10
11A11B11C11D11
\n", + "
" + ], + "text/plain": [ + " A B C D\n", + "0 A0 B0 C0 D0\n", + "1 A1 B1 C1 D1\n", + "2 A2 B2 C2 D2\n", + "3 A3 B3 C3 D3\n", + "4 A4 B4 C4 D4\n", + "5 A5 B5 C5 D5\n", + "6 A6 B6 C6 D6\n", + "7 A7 B7 C7 D7\n", + "8 A8 B8 C8 D8\n", + "9 A9 B9 C9 D9\n", + "10 A10 B10 C10 D10\n", + "11 A11 B11 C11 D11" + ] + }, + "execution_count": 90, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "t1 = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],\n", + " 'B': ['B0', 'B1', 'B2', 'B3'],\n", + " 'C': ['C0', 'C1', 'C2', 'C3'],\n", + " 'D': ['D0', 'D1', 'D2', 'D3']},\n", + " index=[0, 1, 2, 3])\n", + "print('-----t1----')\n", + "display(t1)\n", + "\n", + "t2 = pd.DataFrame({'A': ['A4', 'A5', 'A6', 'A7'],\n", + " 'B': ['B4', 'B5', 'B6', 'B7'],\n", + " 'C': ['C4', 'C5', 'C6', 'C7'],\n", + " 'D': ['D4', 'D5', 'D6', 'D7']},\n", + " index=[4, 5, 6, 7])\n", + "\n", + "print('----t2-----')\n", + "display(t2)\n", + "\n", + "t3 = pd.DataFrame({'A': ['A8', 'A9', 'A10', 'A11'],\n", + " 'B': ['B8', 'B9', 'B10', 'B11'],\n", + " 'C': ['C8', 'C9', 'C10', 'C11'],\n", + " 'D': ['D8', 'D9', 'D10', 'D11']},\n", + " index=[8, 9, 10, 11])\n", + "\n", + "print('-----t3----')\n", + "print(t2)\n", + "frames = [t1, t2, t3]\n", + "\n", + "pd.concat(frames)" + ] + }, + { + "cell_type": "code", + "execution_count": 91, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 91, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# concat类似:linux的split命令把文件分成多个,然后在拼接成一个完成文件\n", + "\n", + "Image(url=\"http://static.cyub.vip/images/202001/pandas.concat.png\")" + ] + }, + { + "cell_type": "code", + "execution_count": 92, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-----t4----\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABCDBDF
0A0B0C0D0NaNNaNNaN
1A1B1C1D1NaNNaNNaN
2A2B2C2D2B2D2F2
3A3B3C3D3B3D3F3
6NaNNaNNaNNaNB6D6F6
7NaNNaNNaNNaNB7D7F7
\n", + "
" + ], + "text/plain": [ + " A B C D B D F\n", + "0 A0 B0 C0 D0 NaN NaN NaN\n", + "1 A1 B1 C1 D1 NaN NaN NaN\n", + "2 A2 B2 C2 D2 B2 D2 F2\n", + "3 A3 B3 C3 D3 B3 D3 F3\n", + "6 NaN NaN NaN NaN B6 D6 F6\n", + "7 NaN NaN NaN NaN B7 D7 F7" + ] + }, + "execution_count": 92, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "t4 = pd.DataFrame({'B': ['B2', 'B3', 'B6', 'B7'],\n", + " 'D': ['D2', 'D3', 'D6', 'D7'],\n", + " 'F': ['F2', 'F3', 'F6', 'F7']},\n", + " index=[2, 3, 6, 7])\n", + "\n", + "print('-----t4----')\n", + "\n", + "pd.concat([t1, t4], axis=1, sort=False) # 此时相当于out joiner" + ] + }, + { + "cell_type": "code", + "execution_count": 93, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 93, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Image(url=\"http://static.cyub.vip/images/202001/pandas.concat.outer_join.png\")" + ] + }, + { + "cell_type": "code", + "execution_count": 94, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABCDBDF
2A2B2C2D2B2D2F2
3A3B3C3D3B3D3F3
\n", + "
" + ], + "text/plain": [ + " A B C D B D F\n", + "2 A2 B2 C2 D2 B2 D2 F2\n", + "3 A3 B3 C3 D3 B3 D3 F3" + ] + }, + "execution_count": 94, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.concat([t1, t4], axis=1, join='inner')" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 95, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Image(url=\"http://static.cyub.vip/images/202001/pandas.concat.inner_join.png\")" + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABCD
0A0B0C0D0
1A1B1C1D1
2A2B2C2D2
3A3B3C3D3
4A4B4C4D4
5A5B5C5D5
6A6B6C6D6
7A7B7C7D7
8A8B8C8D8
9A9B9C9D9
10A10B10C10D10
11A11B11C11D11
\n", + "
" + ], + "text/plain": [ + " A B C D\n", + "0 A0 B0 C0 D0\n", + "1 A1 B1 C1 D1\n", + "2 A2 B2 C2 D2\n", + "3 A3 B3 C3 D3\n", + "4 A4 B4 C4 D4\n", + "5 A5 B5 C5 D5\n", + "6 A6 B6 C6 D6\n", + "7 A7 B7 C7 D7\n", + "8 A8 B8 C8 D8\n", + "9 A9 B9 C9 D9\n", + "10 A10 B10 C10 D10\n", + "11 A11 B11 C11 D11" + ] + }, + "execution_count": 96, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "t1.append([t2,t3]) # 相当于pd.concat([t1, t2, t3])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 连接(Join)\n", + "\n", + "join(on=None, how='left', lsuffix='', rsuffix='', sort=False)\n", + "\n", + "on:join的键,默认是矩阵的index, how:join方式,left-相当于左连接,outer,inner\n", + "\n", + "更多查看[Database-style DataFrame or named Series joining/merging](https://pandas.pydata.org/pandas-docs/stable/user_guide/merging.html#database-style-dataframe-or-named-series-joining-merging)\n", + "\n", + "[Combining Datasets: Merge and Join](https://jakevdp.github.io/PythonDataScienceHandbook/03.07-merge-and-join.html)" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "----left----\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AB
K0A0B0
K1A1B1
K2A2B2
\n", + "
" + ], + "text/plain": [ + " A B\n", + "K0 A0 B0\n", + "K1 A1 B1\n", + "K2 A2 B2" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "---right----\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CD
K0C0D0
K2C2D2
K3C3D3
\n", + "
" + ], + "text/plain": [ + " C D\n", + "K0 C0 D0\n", + "K2 C2 D2\n", + "K3 C3 D3" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABCD
K0A0B0C0D0
K1A1B1NaNNaN
K2A2B2C2D2
\n", + "
" + ], + "text/plain": [ + " A B C D\n", + "K0 A0 B0 C0 D0\n", + "K1 A1 B1 NaN NaN\n", + "K2 A2 B2 C2 D2" + ] + }, + "execution_count": 97, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "left = pd.DataFrame({'A': ['A0', 'A1', 'A2'],\n", + " 'B': ['B0', 'B1', 'B2']},\n", + " index=['K0', 'K1', 'K2'])\n", + "\n", + "print('----left----')\n", + "display(left)\n", + "\n", + "right = pd.DataFrame({'C': ['C0', 'C2', 'C3'],\n", + " 'D': ['D0', 'D2', 'D3']},\n", + " index=['K0', 'K2', 'K3'])\n", + "print('---right----')\n", + "display(right)\n", + "\n", + "left.join(right) # 相当于 pd.merge(left, right, left_index=True, right_index=True, how='left')" + ] + }, + { + "cell_type": "code", + "execution_count": 98, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 98, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Image(url=\"http://static.cyub.vip/images/202001/pandas.join.left.png\")" + ] + }, + { + "cell_type": "code", + "execution_count": 99, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABCD
K0A0B0C0D0
K1A1B1NaNNaN
K2A2B2C2D2
K3NaNNaNC3D3
\n", + "
" + ], + "text/plain": [ + " A B C D\n", + "K0 A0 B0 C0 D0\n", + "K1 A1 B1 NaN NaN\n", + "K2 A2 B2 C2 D2\n", + "K3 NaN NaN C3 D3" + ] + }, + "execution_count": 99, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "left.join(right, how='outer') # 相当于pd.merge(left, right, left_index=True, right_index=True, how='outer')" + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 100, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Image(url=\"http://static.cyub.vip/images/202001/pandas.join.outer.png\")" + ] + }, + { + "cell_type": "code", + "execution_count": 101, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABCD
K0A0B0C0D0
K2A2B2C2D2
\n", + "
" + ], + "text/plain": [ + " A B C D\n", + "K0 A0 B0 C0 D0\n", + "K2 A2 B2 C2 D2" + ] + }, + "execution_count": 101, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "left.join(right, how='inner') #相当于pd.merge(left, right, left_index=True, right_index=True, how='inner')" + ] + }, + { + "cell_type": "code", + "execution_count": 102, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 102, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Image(url=\"http://static.cyub.vip/images/202001/pandas.join.inner.png\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 根据某一列进行join\n", + "\n", + "left.join(right, on=key_or_keys)= pd.merge(left, right, left_on=key_or_keys, right_index=True,\n", + " how='left', sort=False) // 使用left矩阵的key_or_keys列与right矩阵的index进行join" + ] + }, + { + "cell_type": "code", + "execution_count": 103, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "----left----\n", + " A B key\n", + "0 A0 B0 K0\n", + "1 A1 B1 K1\n", + "2 A2 B2 K0\n", + "3 A3 B3 K1\n", + "----right----\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CD
K0C0D0
K1C1D1
\n", + "
" + ], + "text/plain": [ + " C D\n", + "K0 C0 D0\n", + "K1 C1 D1" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABkeyCD
0A0B0K0C0D0
1A1B1K1C1D1
2A2B2K0C0D0
3A3B3K1C1D1
\n", + "
" + ], + "text/plain": [ + " A B key C D\n", + "0 A0 B0 K0 C0 D0\n", + "1 A1 B1 K1 C1 D1\n", + "2 A2 B2 K0 C0 D0\n", + "3 A3 B3 K1 C1 D1" + ] + }, + "execution_count": 103, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "left = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],\n", + " 'B': ['B0', 'B1', 'B2', 'B3'],\n", + " 'key': ['K0', 'K1', 'K0', 'K1']})\n", + "\n", + "print('----left----')\n", + "print(left)\n", + "\n", + "right = pd.DataFrame({'C': ['C0', 'C1'],\n", + " 'D': ['D0', 'D1']},\n", + " index=['K0', 'K1'])\n", + "\n", + "print('----right----')\n", + "display(right)\n", + "\n", + "\n", + "left.join(right, on='key') # 相当于pd.merge(left, right, left_on='key', right_index=True,how='left', sort=False);" + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 104, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Image(url=\"http://static.cyub.vip/images/202001/pandas.join.key.left.png\")" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "----left----\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABkey1key2
0A0B0K0K0
1A1B1K0K1
2A2B2K1K0
3A3B3K2K1
\n", + "
" + ], + "text/plain": [ + " A B key1 key2\n", + "0 A0 B0 K0 K0\n", + "1 A1 B1 K0 K1\n", + "2 A2 B2 K1 K0\n", + "3 A3 B3 K2 K1" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "----right----\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CD
K0K0C0D0
K1K0C1D1
K2K0C2D2
K3K11C3D3
\n", + "
" + ], + "text/plain": [ + " C D\n", + "K0 K0 C0 D0\n", + "K1 K0 C1 D1\n", + "K2 K0 C2 D2\n", + "K3 K11 C3 D3" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABkey1key2CD
0A0B0K0K0C0D0
1A1B1K0K1NaNNaN
2A2B2K1K0C1D1
3A3B3K2K1NaNNaN
\n", + "
" + ], + "text/plain": [ + " A B key1 key2 C D\n", + "0 A0 B0 K0 K0 C0 D0\n", + "1 A1 B1 K0 K1 NaN NaN\n", + "2 A2 B2 K1 K0 C1 D1\n", + "3 A3 B3 K2 K1 NaN NaN" + ] + }, + "execution_count": 105, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#### 多列的join\n", + "\n", + "left = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],\n", + " 'B': ['B0', 'B1', 'B2', 'B3'],\n", + " 'key1': ['K0', 'K0', 'K1', 'K2'],\n", + " 'key2': ['K0', 'K1', 'K0', 'K1']})\n", + "\n", + "print('----left----')\n", + "display(left)\n", + "\n", + "index = pd.MultiIndex.from_tuples([('K0', 'K0'), ('K1', 'K0'),\n", + " ('K2', 'K0'), ('K3', 'K11')])\n", + "\n", + "\n", + "right = pd.DataFrame({'C': ['C0', 'C1', 'C2', 'C3'],\n", + " 'D': ['D0', 'D1', 'D2', 'D3']},\n", + " index=index)\n", + "\n", + "print('----right----')\n", + "display(right)\n", + "\n", + "left.join(right, on=['key1', 'key2'])" + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 106, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Image(url=\"http://static.cyub.vip/images/202001/pandas.join.keys.left.png\")" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABkey1key2CD
0A0B0K0K0C0D0
2A2B2K1K0C1D1
\n", + "
" + ], + "text/plain": [ + " A B key1 key2 C D\n", + "0 A0 B0 K0 K0 C0 D0\n", + "2 A2 B2 K1 K0 C1 D1" + ] + }, + "execution_count": 107, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "left.join(right, on=['key1', 'key2'], how='inner')" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 108, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Image(url=\"http://static.cyub.vip/images/202001/pandas.join.keys.inner.png\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 数据导入导出" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 从csv中导入数据" + ] + }, + { + "cell_type": "code", + "execution_count": 109, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
id日期游戏id游戏名称国家国家码下载数下载用户数成功下载数成功下载用户数安装数安装用户数
075643162020-01-271Uphill Rush Water Park Racing俄罗斯RU111111
175643172020-01-271Uphill Rush Water Park Racing肯尼亚KE222200
275643182020-01-271Uphill Rush Water Park Racing刚果金CD110000
375643192020-01-271Uphill Rush Water Park Racing尼泊尔NP110000
475643202020-01-271Uphill Rush Water Park Racing索马里SO111111
.......................................
17988680104812020-02-02175Soccer Star 2022 World Legend: Football game赞比亚ZM220000
17988780104822020-02-02175Soccer Star 2022 World Legend: Football game尼日利亚NG112222
17988880104832020-02-02175Soccer Star 2022 World Legend: Football game埃及EG220000
17988980104842020-02-02175Soccer Star 2022 World Legend: Football game科特迪瓦CI332222
17989080104852020-02-02175Soccer Star 2022 World Legend: Football game约旦JO110000
\n", + "

179891 rows × 12 columns

\n", + "
" + ], + "text/plain": [ + " id 日期 游戏id \\\n", + "0 7564316 2020-01-27 1 \n", + "1 7564317 2020-01-27 1 \n", + "2 7564318 2020-01-27 1 \n", + "3 7564319 2020-01-27 1 \n", + "4 7564320 2020-01-27 1 \n", + "... ... ... ... \n", + "179886 8010481 2020-02-02 175 \n", + "179887 8010482 2020-02-02 175 \n", + "179888 8010483 2020-02-02 175 \n", + "179889 8010484 2020-02-02 175 \n", + "179890 8010485 2020-02-02 175 \n", + "\n", + " 游戏名称 国家 国家码 下载数 下载用户数 \\\n", + "0 Uphill Rush Water Park Racing 俄罗斯 RU 1 1 \n", + "1 Uphill Rush Water Park Racing 肯尼亚 KE 2 2 \n", + "2 Uphill Rush Water Park Racing 刚果金 CD 1 1 \n", + "3 Uphill Rush Water Park Racing 尼泊尔 NP 1 1 \n", + "4 Uphill Rush Water Park Racing 索马里 SO 1 1 \n", + "... ... ... .. ... ... \n", + "179886 Soccer Star 2022 World Legend: Football game 赞比亚 ZM 2 2 \n", + "179887 Soccer Star 2022 World Legend: Football game 尼日利亚 NG 1 1 \n", + "179888 Soccer Star 2022 World Legend: Football game 埃及 EG 2 2 \n", + "179889 Soccer Star 2022 World Legend: Football game 科特迪瓦 CI 3 3 \n", + "179890 Soccer Star 2022 World Legend: Football game 约旦 JO 1 1 \n", + "\n", + " 成功下载数 成功下载用户数 安装数 安装用户数 \n", + "0 1 1 1 1 \n", + "1 2 2 0 0 \n", + "2 0 0 0 0 \n", + "3 0 0 0 0 \n", + "4 1 1 1 1 \n", + "... ... ... ... ... \n", + "179886 0 0 0 0 \n", + "179887 2 2 2 2 \n", + "179888 0 0 0 0 \n", + "179889 2 2 2 2 \n", + "179890 0 0 0 0 \n", + "\n", + "[179891 rows x 12 columns]" + ] + }, + "execution_count": 109, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.read_csv('../dataset/game_daily_stats_20200127_20200202.csv', names=['id', '日期', '游戏id', '游戏名称', '国家', '国家码', '下载数', '下载用户数', '成功下载数', '成功下载用户数','安装数', '安装用户数'],na_filter = False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 导出数据到csv" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "metadata": {}, + "outputs": [], + "source": [ + "df.to_csv('/tmp/pandas.csv', encoding=\"utf_8_sig\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docs/jupyter/Spark上手示例1:RDD操作.ipynb b/docs/jupyter/Spark上手示例1:RDD操作.ipynb new file mode 100644 index 0000000..b0da35a --- /dev/null +++ b/docs/jupyter/Spark上手示例1:RDD操作.ipynb @@ -0,0 +1,610 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# 引入pyspark,并创建spark上下文\n", + "import findspark\n", + "findspark.init()\n", + "import pyspark\n", + "sc = pyspark.SparkContext()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. 创建RDD的第一种方式,读外部数据,比如本地磁盘文件" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "rdd = sc.textFile('./dataset/Goodbye_Object_Oriented_Programming.txt')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "pyspark.rdd.RDD" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 查看rdd类型\n", + "type(rdd)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1.1 RDD之转换(Transformation)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 0 ns, sys: 0 ns, total: 0 ns\n", + "Wall time: 31 µs\n" + ] + } + ], + "source": [ + "%%time\n", + "## map是转换操作的一种,这时候只是形成DAG\n", + "rdd = rdd.map(lambda x: len(x))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1.2 RDD之行动(Action)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "13187\n", + "CPU times: user 12 ms, sys: 0 ns, total: 12 ms\n", + "Wall time: 1.58 s\n" + ] + } + ], + "source": [ + "%%time\n", + "## reduce是行动操作的一种, 这个时候才真正的计算\n", + "charCount = rdd.reduce(lambda x, y: x+y)\n", + "\n", + "print(charCount)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " 328 2260 13687 ./dataset/Goodbye_Object_Oriented_Programming.txt\r\n" + ] + } + ], + "source": [ + "! wc ./dataset/Goodbye_Object_Oriented_Programming.txt" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1.3 示例:统计单词出现的次数" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['I’ve been programming in Object Oriented languages for decades. The first OO language I used was C++ and then Smalltalk and finally .NET and Java.',\n", + " '']" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "wordRdd = sc.textFile('./dataset/Goodbye_Object_Oriented_Programming.txt')\n", + "\n", + "# take操作就是一种Action, 返回前n数据\n", + "wordRdd.take(2) " + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# 将每一行文本打散\n", + "wordRdd = wordRdd.map(lambda line: line.split(' '))" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[['I’ve',\n", + " 'been',\n", + " 'programming',\n", + " 'in',\n", + " 'Object',\n", + " 'Oriented',\n", + " 'languages',\n", + " 'for',\n", + " 'decades.',\n", + " 'The',\n", + " 'first',\n", + " 'OO',\n", + " 'language',\n", + " 'I',\n", + " 'used',\n", + " 'was',\n", + " 'C++',\n", + " 'and',\n", + " 'then',\n", + " 'Smalltalk',\n", + " 'and',\n", + " 'finally',\n", + " '.NET',\n", + " 'and',\n", + " 'Java.'],\n", + " ['']]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "wordRdd.take(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2493" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 扁平化处理\n", + "\n", + "wordRdd = wordRdd.flatMap(lambda x: x)\n", + "\n", + "# 查看有多少个单词\n", + "wordRdd.count()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['I’ve', 'been']" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 查看前两条数据\n", + "wordRdd.take(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2260" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 过滤掉空格数据\n", + "wordRdd = wordRdd.filter(lambda x: x != '')\n", + "\n", + "# 查看有多少个单词\n", + "wordRdd.count()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[('I’ve', 1), ('been', 1)]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 转换成key-value形式rdd 即 (key, value)\n", + "wordRdd = wordRdd.map(lambda word: (word, 1))\n", + "\n", + "wordRdd.take(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[('face', 1),\n", + " ('was', 18),\n", + " ('Monkey', 2),\n", + " ('how', 4),\n", + " ('Just', 1),\n", + " ('for', 11),\n", + " ('Directories', 1),\n", + " ('could', 4),\n", + " ('gained', 1),\n", + " ('AGAIN', 1)]" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "wordRdd = wordRdd.reduceByKey(lambda x, y: x+y)\n", + "\n", + "# 查看一下\n", + "wordRdd.take(10)\n", + "\n", + "# 查看全部\n", + "# wordRdd.collect()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "# 使用pandas继续计算\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
wordcount
0face1
1was18
2Monkey2
3how4
4Just1
5for11
6Directories1
7could4
8gained1
9AGAIN1
\n", + "
" + ], + "text/plain": [ + " word count\n", + "0 face 1\n", + "1 was 18\n", + "2 Monkey 2\n", + "3 how 4\n", + "4 Just 1\n", + "5 for 11\n", + "6 Directories 1\n", + "7 could 4\n", + "8 gained 1\n", + "9 AGAIN 1" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.DataFrame(wordRdd.collect())\n", + "\n", + "# 设置栏位名字\n", + "df.columns = ['word', 'count']\n", + "\n", + "\n", + "# 查看前10条数据\n", + "df.head(10)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
wordcount
263the121
271to57
576of47
358and45
589a41
797is38
136in35
593I32
685that29
645The26
\n", + "
" + ], + "text/plain": [ + " word count\n", + "263 the 121\n", + "271 to 57\n", + "576 of 47\n", + "358 and 45\n", + "589 a 41\n", + "797 is 38\n", + "136 in 35\n", + "593 I 32\n", + "685 that 29\n", + "645 The 26" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 查看出现次数最多的十个单词\n", + "df =df.sort_values(by='count', ascending=False)\n", + "\n", + "\n", + "df.head(10)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "# 停止spark上下文\n", + "sc.stop()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/jupyter/Spark上手示例2:DataFrame操作.ipynb b/docs/jupyter/Spark上手示例2:DataFrame操作.ipynb new file mode 100644 index 0000000..8fd8250 --- /dev/null +++ b/docs/jupyter/Spark上手示例2:DataFrame操作.ipynb @@ -0,0 +1,1028 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/vnd.plotly.v1+html": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# 导入相关库\n", + "from pyspark.sql import Row, SparkSession,SQLContext\n", + "from pyspark.sql.types import IntegerType,DateType, TimestampType\n", + "from pyspark.sql.functions import col, udf,to_date,from_unixtime,countDistinct\n", + "\n", + "# 计算处理\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "import time\n", + "\n", + "# 图表相关\n", + "import plotly.plotly as py\n", + "import plotly\n", + "plotly.offline.init_notebook_mode(connected=True)\n", + "import plotly.graph_objs as go\n", + "\n", + "# jupyter使用matplot的配置\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# 创建spark上下文,并设置10个分区\n", + "spark = SparkSession.builder.appName(\"vas项目\").config(\"spark.default.parallelism\", 10).getOrCreate()\n", + "sc = spark.sparkContext" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 8 ms, sys: 0 ns, total: 8 ms\n", + "Wall time: 41.3 s\n" + ] + } + ], + "source": [ + "%%time\n", + "\n", + "logPaths = ['/var/log/vas-project/vas_data/201807',\n", + " '/var/log/vas-project/vas_data/201808', \n", + " '/var/log/vas-project/vas_data/201809',\n", + " '/var/log/vas-project/vas_data/201810'\n", + " ];\n", + "df = spark.read.format('json').load(logPaths)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+-----+------------+------+--------------------+---------------+-----+-----------+---+\n", + "|brand|country_code|device| events| ip_address|model| partner|ref|\n", + "+-----+------------+------+--------------------+---------------+-----+-----------+---+\n", + "| Itel| ML| sp|[[click, 15358391...| 217.64.103.74| P13|searchturbo| m|\n", + "| Itel| EG| sp|[[click, 15358391...|196.141.135.133| A32F|searchturbo| m|\n", + "| Itel| NG| sp|[[click, 15358391...| 197.210.226.58| P32|searchturbo| m|\n", + "| Itel| IN| sp|[[click, 15358391...| 157.48.123.237| A22|searchturbo| m|\n", + "| Itel| EG| sp|[[click, 15358391...| 105.199.93.33| A32F|searchturbo| m|\n", + "| Itel| EG| sp|[[click, 15358391...|196.141.135.133| A32F|searchturbo| m|\n", + "| Itel| MA| sp|[[click, 15358391...| 41.249.147.213| A32F|searchturbo| m|\n", + "| Itel| CI| sp|[[click, 15358391...| 154.0.26.115| P32| Unknown| m|\n", + "| Itel| BJ| sp|[[click, 15358391...|197.234.221.243| A32F|searchturbo| m|\n", + "| Itel| EG| sp|[[click, 15358391...|196.141.135.133| A32F|searchturbo| m|\n", + "+-----+------------+------+--------------------+---------------+-----+-----------+---+\n", + "\n", + "CPU times: user 0 ns, sys: 0 ns, total: 0 ns\n", + "Wall time: 11.6 s\n" + ] + } + ], + "source": [ + "%%time\n", + "\n", + "# 查看前10条数据\n", + "\n", + "df.limit(10).show()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 0 ns, sys: 0 ns, total: 0 ns\n", + "Wall time: 6.31 s\n" + ] + }, + { + "data": { + "text/plain": [ + "2075513" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "\n", + "# 查看总共记录数\n", + "\n", + "df.count()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 例1. 按品牌机型统计" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+-----+---------+------+\n", + "|brand| model| count|\n", + "+-----+---------+------+\n", + "| Itel| A52B| 19|\n", + "| Itel| A14| 10136|\n", + "| Itel| S13 Pro| 151|\n", + "| Itel| A16 Plus| 229|\n", + "| Itel| A52| 12812|\n", + "| Itel| A45| 68690|\n", + "| Itel| A22| 69811|\n", + "| Itel| A16| 4210|\n", + "| Itel| S11X| 27366|\n", + "| Itel| A62| 11161|\n", + "|Spice| Z213| 77393|\n", + "| Itel| S11XB| 137|\n", + "| Itel| A15| 11744|\n", + "| Itel| P32|550753|\n", + "| Itel| P13 Plus| 176|\n", + "| Itel|A44 Power| 32|\n", + "| Itel| A32F|537792|\n", + "| Itel|itel A32F| 67001|\n", + "| Itel| A23| 1493|\n", + "| Itel| S13| 19634|\n", + "+-----+---------+------+\n", + "only showing top 20 rows\n", + "\n", + "CPU times: user 8 ms, sys: 0 ns, total: 8 ms\n", + "Wall time: 10.8 s\n" + ] + } + ], + "source": [ + "%%time\n", + "\n", + "# 按照品牌(brand)和机型(model)进行聚合\n", + "\n", + "brand_model_count = df.select('brand', 'partner', 'model').groupBy('brand','model').count()\n", + "\n", + "# 打印一下\n", + "brand_model_count.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 4 ms, sys: 8 ms, total: 12 ms\n", + "Wall time: 10.1 s\n" + ] + } + ], + "source": [ + "%%time\n", + "\n", + "# 换行成pandas\n", + "\n", + "pd_df = brand_model_count.toPandas()\n", + "\n", + "# 查看前5条\n", + "pd_df.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
brandmodelcount
Itel_S13ProItelS13Pro3
Itel_A52BItelA52B19
Itel_A44 PowerItelA44 Power32
Itel_S11XBItelS11XB137
Itel_S13 ProItelS13 Pro151
\n", + "
" + ], + "text/plain": [ + " brand model count\n", + "Itel_S13Pro Itel S13Pro 3\n", + "Itel_A52B Itel A52B 19\n", + "Itel_A44 Power Itel A44 Power 32\n", + "Itel_S11XB Itel S11XB 137\n", + "Itel_S13 Pro Itel S13 Pro 151" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 数据转换,排序处理下\n", + "\n", + "pd_df.index = pd_df['brand'] + '_' + pd_df['model']\n", + "pd_df = pd_df.sort_values(by = ['brand', 'count'])\n", + "\n", + "pd_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
count
Itel_S13Pro3
Itel_A52B19
Itel_A44 Power32
Itel_S11XB137
Itel_S13 Pro151
\n", + "
" + ], + "text/plain": [ + " count\n", + "Itel_S13Pro 3\n", + "Itel_A52B 19\n", + "Itel_A44 Power 32\n", + "Itel_S11XB 137\n", + "Itel_S13 Pro 151" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 只取count列\n", + "pd_df = pd_df[['count']]\n", + "\n", + "# 查看一下\n", + "pd_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA4MAAAGKCAYAAAC2B5QbAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvqOYd8AAAIABJREFUeJzs3XmYZFV5+PHvy7AKKNuIyqgzKoq4gDoCigYBxXEJoAKKBkdCIL+I4hINaIxo3HCJIgRJiGIkQYliIiOiiIILIsuArKJxBJRBNgcVDCLb+/vjnhpq2u6ehu57T83c7+d5+umqW9X1nqq6dbvee855T2QmkiRJkqR+WaN2AyRJkiRJ3TMZlCRJkqQeMhmUJEmSpB4yGZQkSZKkHjIZlCRJkqQeMhmUJEmSpB4yGZQkSZKkHjIZlCRJkqQeMhmUJEmSpB5as3YDZtpmm22Wc+fOrd0MSZIkSariwgsv/HVmzl7Z/Va7ZHDu3LksXry4djMkSZIkqYqI+MVU7ucwUUmSJEnqIZNBSZIkSeohk0FJkiRJ6qHVbs6gJEmSpP646667WLp0KXfccUftpnRu3XXXZc6cOay11loP6O9NBiVJkiStspYuXcqGG27I3LlziYjazelMZrJs2TKWLl3KvHnzHtBjOExUkiRJ0irrjjvuYNNNN+1VIggQEWy66abT6hE1GZQkSZK0SutbIjgw3edtMihJkiRJPeScQUmSJEmrjbmHfW1GH++aI14yo493fx155JEcdNBBPOhBD5rxx7ZnUJIkSZJG1JFHHsntt9/eymObDEqSJEnSNJxwwgk89alPZZtttmG//fbjmmuuYZddduGpT30qu+66K7/85S8BeN3rXsfJJ5+8/O822GADAL7zne/wvOc9j7322outttqK17zmNWQmRx11FL/61a/Yeeed2XnnnWe83Q4TlSRJknpkusMoaw+bHDVXXHEF73//+znnnHPYbLPNuOWWW1i4cOHyn+OPP55DDjmEr3zlK5M+zo9+9COuuOIKHvGIR7Djjjvygx/8gEMOOYSPf/zjnHXWWWy22WYz3nZ7BiVJkiTpATrzzDPZe++9lydrm2yyCT/84Q959atfDcB+++3H2WefvdLH2W677ZgzZw5rrLEG2267Lddcc02bzQZMBiVJkiSpE2uuuSb33nsvAPfeey933nnn8tvWWWed5ZdnzZrF3Xff3Xp7TAYlSZIk6QHaZZdd+NKXvsSyZcsAuOWWW3j2s5/NSSedBMCJJ57Ic5/7XADmzp3LhRdeCMCiRYu46667Vvr4G264IbfddlsrbXfOoCRJkqTVRtdzGp/0pCfx93//9+y0007MmjWLpz3taRx99NHsv//+fPSjH2X27Nl89rOfBeDAAw9kjz32YJtttmHBggWsv/76K338gw46iAULFvCIRzyCs846a0bbHpk5ow9Y2/z583Px4sW1myFJkiSNpNWtgMyVV17JE5/4xNrNqGa85x8RF2bm/JX9rT2DkiRJUodWt2RMqy7nDEqSJElSD5kMSpIkSVqlrW5T36Zqus/bZFCSJEnSKmvddddl2bJlvUsIM5Nly5ax7rrrPuDHcM6gJEmSpFXWnDlzWLp0KTfffHPtpnRu3XXXZc6cOQ/4700GJUmSJK2y1lprLebNm1e7Gaskh4lKkiRJUg+ZDEqSJElSD5kMSpIkSVIPmQxKkiRJUg+ZDEqSJElSD5kMSpIkSVIPmQxKkiRJUg+ZDEqSJElSD00pGYyIayLisoi4OCIWl22bRMQZEfGz8nvjsj0i4qiIWBIRl0bE04ceZ2G5/88iYuHQ9meUx19S/jYmiyFJkiRJmp770zO4c2Zum5nzy/XDgG9n5pbAt8t1gBcBW5afg4BjoUnsgMOB7YHtgMOHkrtjgQOH/m7BSmJIkiRJkqZhOsNE9wA+Vy5/DthzaPsJ2TgX2CgiHg68EDgjM2/JzN8AZwALym0PzsxzMzOBE8Y81ngxJEmSJEnTMNVkMIFvRsSFEXFQ2bZ5Zl5fLt8AbF4ubwFcO/S3S8u2ybYvHWf7ZDEkSZIkSdOw5hTv95zMvC4iHgqcERE/Gb4xMzMicuabN7UYJUE9COBRj3pUm82QJEmSpNXClHoGM/O68vsm4H9o5vzdWIZ4Un7fVO5+HfDIoT+fU7ZNtn3OONuZJMbY9h2XmfMzc/7s2bOn8pQkSZIkqddWmgxGxPoRseHgMrAbcDmwCBhUBF0InFIuLwJeW6qK7gD8rgz1PB3YLSI2LoVjdgNOL7fdGhE7lCqirx3zWOPFkCRJkiRNw1SGiW4O/E9Z7WFN4POZ+Y2IuAD4YkQcAPwC2Kfc/zTgxcAS4HZgf4DMvCUi3gdcUO73j5l5S7n8euDfgfWAr5cfgCMmiCFJkiRJmoaVJoOZeRWwzTjblwG7jrM9gYMneKzjgePH2b4YePJUY0iSJEmSpmc6S0tIkiRJklZRJoOSJEmS1EMmg5IkSZLUQyaDkiRJktRDJoOSJEmS1EMmg5IkSZLUQyaDkiRJktRDJoOSJEmS1EMmg5IkSZLUQyaDkiRJktRDJoOSJEmS1EMmg5IkSZLUQyaDkiRJktRDJoOSJEmS1EMmg5IkSZLUQyaDkiRJktRDJoOSJEmS1EMmg5IkSZLUQyaDkiRJktRDJoOSJEmS1EMmg5IkSZLUQyaDkiRJktRDJoOSJEmS1EMmg5IkSZLUQyaDkiRJktRDJoOSJEmS1ENr1m6AJEmS1KW5h31tWn9/zREvmaGWSHXZMyhJkiRJPWQyKEmSJEk9ZDIoSZIkST1kMihJkiRJPWQyKEmSJEk9ZDIoSZIkST1kMihJkiRJPWQyKEmSJEk9ZDIoSZIkST1kMihJkiRJPWQyKEmSJEk9NOVkMCJmRcSPIuLUcn1eRJwXEUsi4r8iYu2yfZ1yfUm5fe7QY7yjbP9pRLxwaPuCsm1JRBw2tH3cGJIkSZKk6bk/PYNvAq4cuv5h4BOZ+TjgN8ABZfsBwG/K9k+U+xERWwOvAp4ELAA+VRLMWcAxwIuArYF9y30niyFJkiRJmoYpJYMRMQd4CfDpcj2AXYCTy10+B+xZLu9RrlNu37Xcfw/gpMz8Y2ZeDSwBtis/SzLzqsy8EzgJ2GMlMSRJkiRJ0zDVnsEjgb8D7i3XNwV+m5l3l+tLgS3K5S2AawHK7b8r91++fczfTLR9shiSJEmSpGlYaTIYES8FbsrMCztozwMSEQdFxOKIWHzzzTfXbo4kSZIkjbyp9AzuCOweEdfQDOHcBfgksFFErFnuMwe4rly+DngkQLn9IcCy4e1j/mai7csmibGCzDwuM+dn5vzZs2dP4SlJkiRJUr+tNBnMzHdk5pzMnEtTAObMzHwNcBawV7nbQuCUcnlRuU65/czMzLL9VaXa6DxgS+B84AJgy1I5dO0SY1H5m4liSJIkSZKmYTrrDB4KvDUiltDM7/tM2f4ZYNOy/a3AYQCZeQXwReDHwDeAgzPznjIn8A3A6TTVSr9Y7jtZDEmSJEnSNKy58rvcJzO/A3ynXL6KphLo2PvcAew9wd9/APjAONtPA04bZ/u4MSRJkiRJ0zOdnkFJkiRJ0irKZFCSJEmSeshkUJIkSZJ6yGRQkiRJknrIZFCSJEmSeshkUJIkSZJ6yGRQkiRJknrIZFCSJEmSeshkUJIkSZJ6yGRQkiRJknrIZFCSJEmSeshkUJIkSZJ6yGRQkiRJknrIZFCSJEmSeshkUJIkSZJ6yGRQkiRJknrIZFCSJEmSeshkUJIkSZJ6yGRQkiRJknrIZFCSJEmSeshkUJIkSZJ6yGRQkiRJknrIZFCSJEmSeshkUJIkSZJ6yGRQkiRJknrIZFCSJEmSeshkUJIkSZJ6yGRQkiRJknrIZFCSJEmSeshkUJIkSZJ6yGRQkiRJknrIZFCSJEmSeshkUJIkSZJ6yGRQkiRJknrIZFCSJEmSeshkUJIkSZJ6yGRQkiRJknrIZFCSJEmSemilyWBErBsR50fEJRFxRUS8t2yfFxHnRcSSiPiviFi7bF+nXF9Sbp879FjvKNt/GhEvHNq+oGxbEhGHDW0fN4YkSZIkaXqm0jP4R2CXzNwG2BZYEBE7AB8GPpGZjwN+AxxQ7n8A8Juy/RPlfkTE1sCrgCcBC4BPRcSsiJgFHAO8CNga2Lfcl0liSJIkSZKmYaXJYDZ+X66uVX4S2AU4uWz/HLBnubxHuU65fdeIiLL9pMz8Y2ZeDSwBtis/SzLzqsy8EzgJ2KP8zUQxJEmSJEnTMKU5g6UH72LgJuAM4OfAbzPz7nKXpcAW5fIWwLUA5fbfAZsObx/zNxNt33SSGJIkSZKkaZhSMpiZ92TmtsAcmp68rVpt1f0UEQdFxOKIWHzzzTfXbo4kSZIkjbz7VU00M38LnAU8C9goItYsN80BriuXrwMeCVBufwiwbHj7mL+ZaPuySWKMbddxmTk/M+fPnj37/jwlSZIkSeqlqVQTnR0RG5XL6wEvAK6kSQr3KndbCJxSLi8q1ym3n5mZWba/qlQbnQdsCZwPXABsWSqHrk1TZGZR+ZuJYkiSJEmSpmHNld+FhwOfK1U/1wC+mJmnRsSPgZMi4v3Aj4DPlPt/BviPiFgC3EKT3JGZV0TEF4EfA3cDB2fmPQAR8QbgdGAWcHxmXlEe69AJYkiSJEmSpmGlyWBmXgo8bZztV9HMHxy7/Q5g7wke6wPAB8bZfhpw2lRjSJIkSZKm537NGZQkSZIkrR5MBiVJkiSph0wGJUmSJKmHTAYlSZIkqYdMBiVJkiSph0wGJUmSJKmHTAYlSZIkqYdMBiVJkiSph0wGJUmSJKmHTAYlSZIkqYdMBiVJkiSph0wGJUmSJKmHTAYlSZIkqYdMBiVJkiSph0wGJUmSJKmHTAYlSZIkqYdMBiVJkiSph0wGJUmSJKmHTAYlSZIkqYdMBiVJkiSph0wGJUmSJKmHTAYlSZIkqYdMBiVJkiSph0wGJUmSJKmHTAYlSZIkqYdMBiVJkiSph0wGJUmSJKmHTAYlSZIkqYdMBiVJkiSph0wGJUmSJKmHTAYlSZIkqYdMBiVJkiSph0wGJUmSJKmHTAYlSZIkqYdMBiVJkiSph0wGJUmSJKmHTAYlSZIkqYdMBiVJkiSph1aaDEbEIyPirIj4cURcERFvKts3iYgzIuJn5ffGZXtExFERsSQiLo2Ipw891sJy/59FxMKh7c+IiMvK3xwVETFZDEmSJEnS9EylZ/Bu4G8zc2tgB+DgiNgaOAz4dmZuCXy7XAd4EbBl+TkIOBaaxA44HNge2A44fCi5OxY4cOjvFpTtE8WQJEmSJE3DSpPBzLw+My8ql28DrgS2APYAPlfu9jlgz3J5D+CEbJwLbBQRDwdeCJyRmbdk5m+AM4AF5bYHZ+a5mZnACWMea7wYkiRJkqRpuF9zBiNiLvA04Dxg88y8vtx0A7B5ubwFcO3Qny0t2ybbvnSc7UwSQ5IkSZI0DVNOBiNiA+DLwJsz89bh20qPXs5w21YwWYyIOCgiFkfE4ptvvrnNZkiSJEnSamFKyWBErEWTCJ6Ymf9dNt9YhnhSft9Utl8HPHLoz+eUbZNtnzPO9slirCAzj8vM+Zk5f/bs2VN5SpIkSZLUa1OpJhrAZ4ArM/PjQzctAgYVQRcCpwxtf22pKroD8Lsy1PN0YLeI2LgUjtkNOL3cdmtE7FBivXbMY40XQ5IkSZI0DWtO4T47AvsBl0XExWXbO4EjgC9GxAHAL4B9ym2nAS8GlgC3A/sDZOYtEfE+4IJyv3/MzFvK5dcD/w6sB3y9/DBJDEmSJEnSNKw0GczMs4GY4OZdx7l/AgdP8FjHA8ePs30x8ORxti8bL4YkSZIkaXruVzVRSZIkSdLqwWRQkiRJknrIZFCSJEmSeshkUJIkSZJ6yGRQkiRJknrIZFCSJEmSeshkUJIkSZJ6yGRQkiRJknrIZFCSJEmSeshkUJIkSZJ6yGRQkiRJknrIZFCSJEmSeshkUJIkSZJ6yGRQkiRJknrIZFCSJEmSeshkUJIkSZJ6yGRQkiRJknrIZFCSJEmSeshkUJIkSZJ6yGRQkiRJknrIZFCSJEmSeshkUJIkSZJ6yGRQkiRJknrIZFCSJEmSeshkUJIkSZJ6yGRQkiRJknrIZFCSJEmSeshkUJIkSZJ6yGRQkiRJknrIZFCSJEmSeshkUJIkSZJ6yGRQkiRJknrIZFCSJEmSeshkUJIkSZJ6yGRQkiRJknrIZFCSJEmSeshkUJIkSZJ6yGRQkiRJknpopclgRBwfETdFxOVD2zaJiDMi4mfl98Zle0TEURGxJCIujYinD/3NwnL/n0XEwqHtz4iIy8rfHBURMVkMSZIkSdL0rTmF+/w78M/ACUPbDgO+nZlHRMRh5fqhwIuALcvP9sCxwPYRsQlwODAfSODCiFiUmb8p9zkQOA84DVgAfH2SGJIkSVqFzT3sa9P6+2uOeMkMtUTqt5X2DGbm94BbxmzeA/hcufw5YM+h7Sdk41xgo4h4OPBC4IzMvKUkgGcAC8ptD87MczMzaRLOPVcSQ5IkSZI0TQ90zuDmmXl9uXwDsHm5vAVw7dD9lpZtk21fOs72yWJIkiRJkqZp2gVkSo9ezkBbHnCMiDgoIhZHxOKbb765zaZIkiRJ0mrhgSaDN5YhnpTfN5Xt1wGPHLrfnLJtsu1zxtk+WYw/kZnHZeb8zJw/e/bsB/iUJEmSJKk/HmgyuAgYVARdCJwytP21paroDsDvylDP04HdImLjUhV0N+D0ctutEbFDqSL62jGPNV4MSZIkSdI0rbSaaER8AXgesFlELKWpCnoE8MWIOAD4BbBPuftpwIuBJcDtwP4AmXlLRLwPuKDc7x8zc1CU5vU0FUvXo6ki+vWyfaIYkiRJkqRpWmkymJn7TnDTruPcN4GDJ3ic44Hjx9m+GHjyONuXjRdDkiRJkjR90y4gI0mSJEla9ZgMSpIkSVIPmQxKkiRJUg+ZDEqSJElSD5kMSpIkSVIPmQxKkiRJUg+tdGkJSZIkSVpdzD3sa9N+jGuOeMkMtKQ+ewYlSZIkqYdMBiVJkiSphxwmKkmS1CPTHSK3ugyPk2QyKEmS1CmTMUmjwmRQkiT1ismYJDWcMyhJkiRJPWQyKEmSJEk95DBRSZIkSZ1xqPbosGdQkiRJknrIZFCSJEmSeshkUJIkSZJ6yGRQkiRJknrIZFCSJEmSeshkUJIkSZJ6yGRQkiRJknrIZFCSJEmSeshkUJIkSZJ6yGRQkiRJknrIZFCSJEmSeshkUJIkSZJ6yGRQkiRJknrIZFCSJEmSemjN2g2QJEn9Mvewr03r76854iUz1BJJ6jd7BiVJkiSph0wGJUmSJKmHTAYlSZIkqYecMyhJUs84Z0+SBPYMSpIkSVIvmQxKkiRJUg85TFSSpA45RFOSNCpMBiWpZ2onI32PL0nSqBj5YaIRsSAifhoRSyLisNrtkSRJkqTVwUgngxExCzgGeBGwNbBvRGxdt1WSJEmStOob9WGi2wFLMvMqgIg4CdgD+HHVVknSNDhMUZIkjYJRTwa3AK4dur4U2L5SWyTNgNqJ0HTjz0QbJElSv9X+PjQQmTkjD9SGiNgLWJCZf1Wu7wdsn5lvGHO/g4CDytUnAD+dRtjNgF9P4++ny/h1449CG4zf7/ij0Abj9zv+KLTB+P2OPwptMH6/449CG6Yb/9GZOXtldxr1nsHrgEcOXZ9Ttq0gM48DjpuJgBGxODPnz8RjGX/Viz8KbTB+v+OPQhuM3+/4o9AG4/c7/ii0wfj9jj8Kbegq/kgXkAEuALaMiHkRsTbwKmBR5TZJkiRJ0ipvpHsGM/PuiHgDcDowCzg+M6+o3CxJkiRJWuWNdDIIkJmnAad1GHJGhpsaf5WND/XbYPx+x4f6bTB+v+ND/TYYv9/xoX4bjN/v+FC/DZ3EH+kCMpIkSZKkdoz6nEFJkiRJUgtMBiVJkiSph0wGey4iZkXEx2q3o7aI2DwiXlp+Htpx7MdGxDrl8vMi4pCI2KjLNkhSH0XEDrXbIEk1mQwWEbF2RDy5/KxVuz1dycx7gOfUbkdNEbEPcD6wN7APcF5E7NVhE74M3BMRj6OZLPxI4PMdxjchlVRNREz4XaSD49CnhmL9sOVYklYhEbFJ7TZ0wWSQ5ssv8DPgGJp/DP8bEX/WYfxHR8Rm5fIOEfG2iHhZV/GBH0XEoojYLyJePvjpInBErBsRCyNi92gcGhGnRsQnB69JB/4eeGZmLszM1wLbAf/QUWyAezPzbuBlwNGZ+Xbg4R3Gh8oJaURsFxHPLJe3joi3RsSLO4r9oIj4u4h4e9kfX1c+Dx+JiA06iL99RDy4XF4vIt4bEV+NiA9HxEPajj9Bm07oON5WEbHr2Nc7IhZ02Y6huM8p++BuHcas9hpExMMi4tiIOCYiNo2I90TEZRHxxYjo4li0OCK2H6ddfwVc1HLsGLq8bsuxpqysr/zyiNiqw5g198GIiH0iYu9yedeIOCoiXj/ZyYKW2/TBDmO9fOjyxl3FHacdI3UsLrG/3lGcHSPiyoi4ovxfPgO4ICKujYhnddGGoXacERH/GxFXRcTVEXFVqzGtJgoRcSHw6sz8abn+eOALmfmMDmL/A/A6IIGTgOcD3wG2By7JzDd30IbPjrM5M/MvO4j9ReAuYH1gY+By4Ks0vZXbZuZLO2jDZZn5lKHra9C89k+Z5M9mMv55wJE0SemfZ+bVEXF5Zj65i/ilDRdl5tMj4u3AHZl5dET8KDOf1kHsw4EX0Sx1cwbNvn8W8ALg9Mz8QMvxvwhcC6wHPAG4EvgvYHfgYZm5X8vxrwC2KeuqHgfcDpwM7Fq2t3piJiIWjd0E7AycCZCZu7cc/xDgYJrXfVvgTZl5Srntosx8epvxS5zzM3O7cvnA0p7/AXYDvpqZR7Qcv+prEBHfAL5Gcxx+NXAizcmgPYHnZ+YeLcd/Ds3J2POBQ4FH05yYXQq8JTOXthj7EuB5NCfHzyyXlyeImXlLW7HHtOMrmblnubwHzf+E7wDPBj6Umf/ecvza++CngIcCawO3AusAi4CXADdm5ptajn/U2E3AfsAJAJl5SMvxl7/GXR33xmlDtX0gIiZ67ABOzczWT0pFxPnAAcAGNN9D98zMs0vbjs7MHdtuQ2nHT4C3ABcC9wy2Z+aytmKO/DqDHVlrkAgCZOb/RndDRfcFngg8CPglzZfP2yNiTeDiLhqQmft3EWcCW2fmk8vzXZqZO5Xt3yj/pLvwjYg4HfhCuf5Kul3bcn/g/wEfKIngPOA/OowPcFdE7AssBP68bOvqM7AXzT+edYAbgDmZeWs0c1nPA1pNBoHHZ+Y+ERHA9TRffjMizga62AfXKD3DAPOH/uGeHRFdHAPmAD8GPk1zUiqA+cA/dRAb4EDgGZn5+4iYC5wcEXMz85Os2GvTpuF9/SDgBZl5c9kHzwVaTQap/xpsnplHA0TE6zPzw2X70RFxQNvByxeuZwDvAX4O/B44IDO/2XZs4CE0X7oGr/NwT2QCj+mgDdAkwAOHAruU/webAd8G/r3l+LX3wedm5lPKd68bgIdn5p0R8QXa7x2GZmTOd4Fvct/zfRXNvtGFmOByl2ruAxfQvP7jxelqyspamXkZQETcnJlnA2TmRRGxXkdtAPhdZnbSGzpgMthYHBGfBv6zXH8NsLij2Hdk5p3AnRHx88y8HaD0EtzZRQNKT+ixNF8InhwRTwV2z8z3dxD+Tlj+fH815rZ7xrn/jMvMt5chGoO5k8dl5v90EbvE/zFwyND1q4EPT/wXraiZkN5d5q7eXj4DtwJk5h8i4t6O2kBJAE/LMlyiXO9i6MTlEbF/Zn4WuCQi5mfm4vK5vKuD+POBN9H0TL89My+OiD9k5nc7iA1NMvx7gMy8Jpph+ydHxKPp7kvRGmVo1ho0I2ZuLu35v4i4e/I/nZn4lV+D4WF4Y4cIdzVEby+ak6PH0owKeGVELG67Zy4z57b5+PfD8LFmzfJ/gMz8dUfHwdr74N0l9l0RcUH5XjT4btDF898aeB+wAHhbZv4qIg7PzM91EBtgvYh4Gs3nbd1yebiHuouEuOY+cCXw15n5s7E3RMS1LcceGD7WvWPMbWt31AaAsyLio8B/A38cbGxzHzAZbPwNTdf44Av59xmaVN6yjUoiEsCDh8aNB80Zyy78G/B24F8BMvPSiPg80EUyOKcMz4ihy5TrW7QdPCJmAd/KzJ1pPnidi4irWfGLAACZ2dUZ6doJ6Z0R8aByImT50Oxo5st18SVgcURskJm/Hx4aHRGPBW7rIP5fAZ+MiHcBvwZ+WP75XVtua1Vm3gt8IiK+VH7fSLf/G26MiG0z8+LSnt9HxEuB44FOhmqzYu9QRsTDM/P6aObNdPFFuPZrcMrQZ+Bdg43RzCH+37aDR8S3gDtoeuWvLp+Fg2nm63w4M4/roA1rZeZdY7Ztlpm/bjt2sU1E3Eqzv60ztA+uDczqIH7tffCGoX1w+fy0iHgY5aRxmzLzNuDNpYf6xIj4Gt3W1bge+Hi5fMPQZWi+H+zSQRtq7gPvYeLX+40txx74h8F3kcz8ymBj+S7Q5Tz6wfzp+UPbWt0Hej9nsCQDJ2TmayrFH2++3nJdDOEsZ+GeGUNzxCLi4szctoPYCye7vYuzchHxbeDlmfm7tmNNEH/Toavr0lQ13SQz391hG6olpBGxTmb+cZztm9EMFbqs7TZMJCIiOzpIRlNEZh5NIrY0M2/sIu447XgJsGNmvrOjeHNoeodvGOe2HTPzB120YzwR8SCaERNXtxxnZF+DLkTEy8YbjVESgX9q8/9zROxMMwpiXZrhiH89eL+j0tytMe3bCHhiZrZa6XRU98GIWB9YPzNv6jBmAK8HnpWZf9FV3NpGdR9Q+3qfDAKUuUG7DIYl9E00lZreAHwpmyIie9HM13hRB7HXKD0T4922UWb+toM2nAI8jaZ4yf8NtmfLE8ZX0qYLs4MCRkPxqiekNUXEdjQjQy+IiK1phgr9JDO7nDs6Xrs2GAzb6Uv80hu1DXBl6bGuqtZ7EBGbtD1EcortGAxhXi1FxAXA6zLzivK/70PAfpl5bnRURGuStu2emWMLPPVORGx1WUydAAAgAElEQVSVmT+pELfT17+cEJydmT8fs/2pmXlpR21YA5oRI6VX+snANTWPRaNwDIqI4zLzoI5i/Zxmrvr3ge9n5hWtxzQZhGjKqD+RpnLVcDLw8Qn/aGbjb0UzJPK84S8dEbEgM7/RQfzH0Cwn8GzgN8DVwGsy8xcdxL4I+JvMPG/M9r8C3tlRz9S4vZNdzRWIFatorUEzNOBvMnObLuJPpKuENJo5qsfRfAa+Dhyamb8pty2v8thi/KrVTFfStl9m5qNW5/gRcRawd5kbtR/Nsi7fo3kfjstS2KSWjl6Dd2WZo11ORnyFpqhNAK8ce3zs0gjsg19v88RkRFwyfKyNiCfRTBk4FHh3Vz2D8afLOQVNhdXXA2Rmq9MYIuIpNFNGqhyHV9K2Lj6D41Vt/hTdvf770FSQvYnms/+6zLyg3NZVVeU9aaYL3UtTQ+CdNMWcnkDzneSrbbdhgnZ1cgyKidcUDJoK83PabkNpxzo0//+eC+xI8/pfmpmtLTnnnMHGz8vPGsCGXQaOFUv5fiYilpfyBT4ItJ4MZuZVwPPLcIw1ytj5rhwCHBdNSd+xJcVbX+sxIralOQFwRWZe2Xa8CQxXbbwbuAbYp8sGTJCQdnV8+BTNfIFzaebInV3OyP6cbiqaVq1mGhFvnegmmhLXraodn+ZM+GBe1iE0Q7OWlSGa5wKtJ4Mj8Bq8nPvmaH+UpqT710uP9ZE0J+paExET9ToEsHmbsUv8ycrKtz1d4a6IeNhgaFzpIdwVOBV4bMuxh/0XcDpNMjCYp7o+TXXnpP057cdS8Tgcf7q0w/Kb6KaaZO3X/500lTyvL5/7/4iId5Th010V0jqcZlTGejSVtJ+ZmT+NpoDMl2mWW2hF7WNQcTPwC1Z8vQcVth/aURugKZ54V/l9L80+2eow6d4ngxExm2Z9pSVdDEkcR+1yzn/SJQ203iU9kBVLikfEu4G/oCkc8ZGI+FBm/lvbccfKpnhNbTUT0g2HesA/Fs26n98ovURdDF2oXc30gzQJwHhVK7soYFA7/l0RsUVmXkfz+R+Mzvgj3RTOgPqvwbBHZCkrnpnnRzclzTcHXkgzMmRYAOd0EL9mWfnDaJ7/8nlSmbk0mkqKB7cce9izaZYwuSAzjwWIiOd1UTegqH0c3h/4W4aqJw7Zt4P4tV//WZl5PSz/3O8MnBoRj6Sb158S+wZY3hv307LtF4Phoy2qfQwCuArYNTN/OfaG6K6iKTTrbF5GU0To37LF9QUHep0MlqGIH6RJQuZFxEEVxufXLucMTUnlQZf0RyOi9S7pMaqUFKdZT3DbbNZ13JSmF7azZHCS3gigu2HKJVbVhDQiHpKlgE9mnhURr6A5EznRsI2ZVLua6UXAVzLzT9azKseo1T3+W4BvRsSXaU5EnRnNup/PAbqaJ1L7NXhMRCyC5VWVB/sjdNM7fiqwQZYqgsMi4jsdxK9WVj4zvzXB9t/S/hqnw/EuiIgXAG8sQ6cPpcMkAKofhy8ALs/MP/niHxHvaTv4CLz+t0XEYwfzBUsP4fNohow/qatGxH11HIYra8+i/aUVah+DoBmFsTHNmt9jfaSjNkDzffg5NEOU/yoizgG+l5nfbitgr+cMRsTlwM7ZLC78GODEzHxWx204E3jr8AcgmgXYj6eZt9f6mfES75nATjQ74KY0yeBfdxB7UFL8jdmUFF+D5mzsm4FWS4qPHYff1Ry5oXiHT3Z7Zr63gzZUT0gj4tXAVZl57pjtjwL+ITMPbDl+1Wqm5eTLshynhH1EbJ4tVxWtHb/EeQjwauDxlGqqwCnZUdGI2q9BROw0ZtOFZbTI5sBemXlMm/Fri6Zwy2WDnogxt+2ZQ2XeW4i9FfAJmhM/h9DMWd2TZkmNhTWmD0TEI2i+mM7PjpYYGoHj8CY06y7fvtI7t6zS678NcPvYEyIRsRawT2ae2EEbnknzObxjzPa5wHMy8z/H+zu1oxybXkTzffihmdnaKJG+J4Njk4HOy0jHCJTyjYjbua9L+ltddEkPxa5ZUvy3NIUqoDkj/9yh62Tm7m3FHtOOLteyGhu7ekI6kYhYF/jzzPxSy3EeBNyVZY2xkhi8GPhFtlw0YCXt6uT5G39029D3z0AXIuJ7NEOEN6AZJngozfyxlwJvzsxdKzavN/tgibUt8DjqzuFfQc3XPypUFK75HsQIVDKN0ajo+mWauZs/577pW+eNTdJnNGbPk8GbgJOGNr1q+HpWWlqg6wNAROxB0yO4Hc3irq13SY+Ccc7GryAzv9ty/JfSDIO7i+as9D7jDZHpQs2EdEw7ZtHMG9gX2I2mrPJeLcf8Hs081Z9Fs6zB+cCJNMOnz8/Md7QZf0xbOn/+xh+tNozgZ+CCzDyszfgraVurZeVjxfV1l2Tm44Zuq7LOYE/3weE5/NsDVebwl7bUeP47Ap+m+S7wlzQFpR5DMzxzn2x5ncnShmrvQYxAJdMYgYquJdZ84EfZ1DLoRmb29gdYONlPR21419DlrWmGplxNU8Bj+45fj61o5u/8AvjDCLw/X6/dhpaf36XAVuXy9sB3K7ThpTQVtH5FMzTv2ZVei51o/hFcSzNH5QbgQR3Fvmzo8vuAY8rltYdvW12fv/FHow19/wxM0rZftvz4lw5dfv2Y2y7v+Ln2eR+8YhCLZqrKBV2+9iPw/M8HngI8C/g1zbBMgKcDP1jd3wPgR8DDgHk0BVSeULY/GljcURsuppkaAk3nyE+Alw3a1+FrsRbNkPWTy88bgbXajNnrAjLZ0TpyK1G1pDiM2yX9WpqS+q2LuiXFa7s7y5yozDwvIjpd1qT4IPDczPxJRGxPM0l60h7TmRYRS2kmbB8LvC0zb4uIq7O7uSPDwyN2ofkckpl3RgfVRGs//77HH4U21I5P/c9AzbLyx0TEBpn5+8z81FCbHgeMW1ymDbX3gdrxgT8OYmWztEynVXxH4PmvlWV+ekTcnJlnA2TmRdFNRWGo/B5kvUqmAyNR0ZVmH1yLZtktgP3KttaKmfU6GSzz0t5N8ya/myb7fjnN2YA3DXaKDtUoKQ7wIbrukr5PzZLitT10TAGXFa5nN9VERyEhPZmmYMMrgXsi4hS6PfBeGs2agtfRzJX4JkBEdLX/1X7+fY8/Cm2oHb/2Z6BaWfnM/NcJti+JiNaLdgypvQ/Ujj+oqAvN+/7Yoetk+3P4az//4YRn7NSEtit5DlR9D6JeJdOBkajoSrO+4zZD18+MiEvaDNj3OYPfoFljcH2aSnYnAp+nOSA8PzP36KANgyImAewAPHpwZiYiLs/MJ3fQhrWAv+G+Rd6/C/xLlmICLce+nKYbftyS4pn5yLbbUMsoFG8pZ0OHk863Dl/vKCElIgJ4Hs0cjRcDDwEOAE7LsvRKi7HXA94EPBw4PjMvKdufDTw2M/+jzfglVrXnb/zRaEOfPwMR8Rngs4PekDG3fT4zX91m/DHxtqZ5D/YFfpuZ8zuM3ed9sOoc/tKGms9/d5oCfreP2f5Y4BWZ2frSBjXfgxiBSqYxAhVdS7yLgL0HSWk0qx2cnC3OWex7Mjg8cfyXmfmoodsuzszWhymO8+HrvKR4RHyapkt6MGx2P+CezGx9fa2oW1L8YcDhNBOWBz3Dr6BZ86pGz3DnRiEhHasceAeT91+YmZt13YbxRMSXM/MVHcSp+vz7Hn8U2lA7/kS6+gx0rXzhHCSAd9HMU5qfmddUbJP74Dj6chweakf1qspjtfEeRMSjcpzF3rsUES/MzNMnuG3vrt6DiNiVprjgVTQdRY8G9s/Ms1qL2fNk8JJBV2xEvD8z3zV026WZ+dSKbeuynPPy12GybaubUegZ1uQiYr3M/EO5XPWL6PDJow5jVn3+fY8/Cm2oHX9MW1r5DETFpS0i4ofAg2kqiZ+UTUXVqzNzXptx74/a+0Dt+GPastofh2MEqipPpo33IIaqdVY81t9DM1LvLzLzuona11Fb1qGppArw0xxnLeSZ1Onk0BF0SkRsADAmEXwcTVXPTkXErIh4cUT8B01Fz1d2FPqeMhRh0I7HADXmD64gIvZvOcTmmXl0Zh4BbJSZH87MazPzaJozMaps8A+46GTx30l0fuas9vPve/xRaEPt+GO09Rn4BjAXlv///SHNcz04Io5oKebAjcCGNPMWZ5dtI3WWvPY+UDv+GKvtcTgidoqIf6WpJn8A8AJg3iglgkUb78Fw3Yha+9ilNB0C55ZRa8PGq2vRpmfQrLO4LfDKiHhtm8F6XUAmM989wfYlQJdr+uxE0zP1YprywjvSHAC6qmL1duCsiFihS7qj2JN5L01XeVuGT4acMMltGg0j9QWtgtrPv+/xoX4basdvy8ZD83QWAl/IzDdGs/D0hUBr6xxm5p4R8RCa4nHviYgtgY0iYrvMPL+tuNNQex+oHb+2Vp5/1K9mWltOcLnTNmTmv0XEd4ETI+IlwMHlPeisTaVD6LE0S10MOmaSP/2eOmN6nQxOJlpe6HYoTvUDQGZ+u/wD7KxLeiDqlhQ/Je4rKd55z3CZrL43zYf8ZJqS7nvQVLP9l2yqaml0dH1mUBo1bX0Gqi5tkZm/oznx+NmIeCiwD/CJMo9ptS1itopaXY/DtauZ3h9tvAfbRMSt5bHXK5cHsTIzH9xCzHFl5v9GxLNoln37Udu9cuOYD2ydHc7jMxmcWNu9UgPVDgAlAfwYzRmIy2iS0esm/6sZV7OkeO2e4WOAh9KUTd4DWAdYBLyEJjF/U9sNiGZtwSsz89ZoKgoeRrPI7Y+BD5YvSaOi9peAQyvHr/38+x4f6rehdvy2PgO1l7ZYLjNvAv4Z+OeIGMXpArX3gdrxV8vjcGa+OSLewn3VTD8CPCQi9qHDqspTNOPvQWbOmsr9ImLjzBz7fXGmLH9vM/Nu4LBSW+IL3DeEvAuXAw8DOiti2PcCMpP1Sj0+M9fpqB1VyhlHxPdpup2/B+wOPCszX95WvAnaMDIlxcfEbr1nOCIuy8ynlKplNwAPL2fC1wQu6qKAUURcAWyTmXdHxHHA7TQnKHYt2zvdHyYTEbtl5jdbeNzLGP8EzOCMZLVCUsPaev7GX3XasLp+BmIElndZVbgP1tXV61+jmumq8B60WcglJqhgHxEbA39d6ku0nZASEWfRzBU8H1g+Si9bXOex78ngjUzSK5WZj6jQps4OADFm+YyuqyWNshiz1EhLMYaXNvlGZi4Yuq2rpU2uzMwnlssrvP8dtqH2F9FJz/5n5i9ajl/7+fc6/ii0YQTiV/0MTFXtSpZtGoF9oHb8Xh+HJ9NVNdPa78FU1KgmO04bWv2uHBOs95gtrvPY92GipwIbZObFY2+IiO903xzIprT2qcCp5WzpoD1tHADWjYincV/X+HrD1zPzohmO9yeibknxmvMVAW4YmrM4nAg+DLizg/gAlw/1gl4SEfMzc3FEPJ5mva0uvLSjOOMa/gdX/hlumZnfKp+/Lo6RVZ+/8YH6bej7Z2CqaleybJP7YNHT4/CEsqNqpiPwHkzFKPRgtT1U+kGZ+fUVAkb8P6C1ZLDXPYOrkjbOhpSu6IlkZu4yk/EmaMP3gAOyWdvpcTTd4icCWwMXZGZrVeRGsWcYICLWB9Yvc1fajvUQ4JPAc4Ff08wXvLb8HDIYrtWV8f4BZeZtHcU+EDgI2CQzH1vm1P5LZu7aRfzShmrP3/ij0Ya+fwYm08YZ+Yg4mkm+YGbmITMZbyrcB/t9HJ5MFyO4RuE9mMgojGDroGfwHOBdmXlmuf53wM6Z+aK2Yo5Kpl9FzV6pB2DGs/bM3HmmH/MBqFZSnBHsGQbIzP+LiEcCrSeD2RSIeV1EPBiYR3NMWJqZN7Yde6zhf0A0RY3mAP9CM3+xCwcD2wHnAZQTFA/tKHb159/3+KPQhtrxqfwZqGRx7QYMq70P1I5Pz4/DI2KUjwO1Cxh1YXea0YFvBxYAW9EUGWxNr5NBmoVuDwAGvVI/pOmVemk0awy1mYioUa2keGYeMMltVQrXDPkm0OqcxWGZeSuwQi/gYAhrV22g/j+gP5b9DoBoCvl0OXSi9vPve/xRaEPt+LU/Aysz418EM/NzKwSIeFDWXdut9j5QO37tfbD281+ZLpKhqu9BRDyHpmf2sxExm+ak/dXl5lFIylt9DzLz1xGxO/Atmk6RvbLlYZx9TwZr9krdX6vr2ZBqJcVr9wxHxFET3QR0XlJ9HD+mw4SU+l8CvhsR76SZO/sC4PXAVzuMX/v59z3+KLShdvzan4GVaW1ZgWjWFfsMsAHwqIjYhqaC4OvbijmB2vtA7fi198Haz39lulhao9p7EBGH06yz9wSa5d3WAv4T2BEgM2/pqB2dJ6QRcRsr7mtr08wR3SsiMltca7HvyWDVhW7vp9pr67TlQJqS4nOB3YbOyG5NswZim2r3DO8P/C1DpYOH7NtybAAi4q0T3UTzpahLtb8EHEazP1wG/DXN0i7/1mH82s+/7/FHoQ2141f5DMQUKzlmuyX9j6SZQ76oxLokIv6sxXgTqb0P1I7fy+PwiHwGBmq+By8DngZcBJCZv4qIDTuKDdRLSDOz0+c5rNcFZCLiP2nWd7uOZuefl5m3l16p72bmNh20YaTKGUfEBzPznV3GnIpooZpqlHX+yuX30UyWPnjQMzy4rS0RcSbNJOFzxrnt6syc12b8EucOmpMgd49z81sys7MeyohYg+Yf0G40+//pXX4JiIg3ZeYnV7atxfi1n3+v449CG0YgfpXPQIxASfuIOC8zt48Vl/y5pIvvAWPaUXsfqB2/l8fhUfgMDLWl2nsQEedn5nZRirREU1Dvh11+F46IiykJ6dCx4NKO27AF8GiGOu0y83utBczM3v4A69EkgZ+kWWB7sP3ZwH4dteHRk/20HPuoMT9HA78dXK/9/oxp649aeMxLhy7/ANhz6PolHTynTWhKCNd8Xc8BnjHBbdd23JY3TWVbi/EvGmfbjO93I/z8ex1/FNowAvGrfgZKvEcDzy+X1wM27CjuyeV//0U0PQFvA07q8rmPyD5QO36vj8MlXpXPwCi8B+Vz96/AVTQjx34IvLHj53/+8OsArD/8fbGD+B8GrgFOo+mV/iqwqM2Yve4ZnKo2eqUmiNNpOeOIuJZm3ZJvct+cxI/RfBjJMRPra4p2SoqPQs/wtjRzJa/IzCvbjjdO/CcAyzLz1+Pctnl2WFV0vPc4OlhgNiL2BV4NPAf4/tBNGwL3ZkfltGs9f+OPThv8DNQraR8Rm9GcGH4+zf/Db9Isr9PJHKWhdrgP9vs4XPMzMCrvwQtYsWf2jC7iDsV/G7Al8ALgQ8BfAp/PzKM7iv9T4KmZOd4Uolb0fc7gVLW+0G3UKWe8NfA+mtK1b8tmbPbho5QEtqzmfEUi4t3AX9AUK/pIRHwoOx4Wl5k/naBt6wJ/Bnyp7TYM/QOaFxGLhm7aEOjii9g5wPXAZsA/DW2/Dbi07eC1n3/f449CG2rHp/JnYEjNSo5PyMzXDG+IiB1pRo20rvY+UDs+PT8OD6n5Gah+HIiIecD3BwlgRKwXEXMz85ou4gNk5sdKQnorzbzBd3eckF5FMzrBZHDEdNF92vkBoPQ6vjkingGcGBFfA9ZoM+Y0tFFS/A/AEeNsP4fmoNgEbq9n+JXAtqU3clOagjadJoPDImIWTQGFfWnOyn2fDpJBKv8DymYuxi+AZ7UdawK1/wH3Pf4otKHvn4GBmpUcjwbGjj4Zb1tb3Af7fRweqPYZGIH3AJrvHM8eun5P2fbMrhowAgnp7cDFEfFthhLCzDykrYAmg6Oj5gHgwojYhaZq1tldxHwAalZTbatn+I+D3sjMXFYmrncuInaiOSP6YuB8mopZ87KjtbZq/wOKPy3nvPwmmiJOrZVzhvrPv+/xR6ENtePX/gwM+W50XMkxmiUlng3MjhWrKz8YmNVm7GG194Ha8Wvvg7Wf/5DOPwMDtd+DYs3MvHNwpXwvXruDuMNqJ6SLyk9nTAanpos1/qodAKD5lAPHlHmEnYnRKqc8kbaS8scMDUcJ4LHDw1Myc/eW4i4XEUuBXwLH0gwVvi2aSqadLbpc+x9QViznDPWff9/jj0Ibasev/RkYUqOk/do0y+isSTMkcOBWYK+WYy9Xex+oHb/2Plj7+Q+ptqxD7feguDkids/MRQARsQfwJzUNWlY1Ia0xVcsCMlMQEbu1nYxEhXLGEfHycTZ/iiYRJbtZeH1kyilPZLwJ5TP0uDtNdntmfnemY47ThiOBPYHLgc8DpwCXZWbr82QlaVjULWn/6FH4f6N+q/kZGAUR8Via9Z4fQfNd+FrgtZm5pMM2nAEcPSYhPaTtAjrjdI4kTSJ8FvCxzLyjtdh9Tgan2ivVUVs6PwBExF3A6cBN3Nf7uRdNie3MzL9sK/YE7em0mur9aFenFQ3Hid9qNdtoxiY/j2au4IuBh9CcmDgtM3/fVlxJGjbeibe2j78RcWRmvjkivso43we6GKEhDdT4DIyiiNgAoMZ3kFoJ6QSdI5sAC4H1M/PA1mL3PBkcmV6pSv8En0lTQOXkzDy2bLs6O1jsfJy2VCunvDJd9AyvJH6XZa3X4r4iMi/MzM26iCupv6JiSfuIeEaZNz/uSI0uRmhINT8DoyAi/iIz/3PMvN3lMvPjFdpULSEdpy2tfg/s9ZzB4WRvvF6pLtpQs5xxZl5Q5ie+MSLOoinSUuvsQOfVVFeR+YrQ4XuSmXcBpwKnls8B0H7vpKReq1bJMTMvLL8nTfo8Bqplo1LNtJb1y+9q8xYnSkgHhR1rJKRDWi0w2OtkcCDqrPE3ULuc873AJyPiS8CRbcebRI1qqi9t+fFXadksvTHgHEJJrRihSo6T8Rio1qwin4HWZOa/lt/vrdiMqglpRIxXm2JjmvWov9dq7D4PEx2IiIspvVKDbtiIuCwzn1K3ZXVEs+D4n2dmF2vMDWJ+BPgt8FrgjTRFbH6cmX/fUfyRnK8IozFfoK0iOpI0QpUcJ+QxUG1aFT4DXYiIxwCfBHageT1+CLwlM6+q2rAOlBF6wxJYBnwHOK6M3GqFPYONamv8jcoBIOotOD5QrZxy5Z7hqai5xqIktWpEStpL1fgZWO7zwDHAy8r1VwFfALbvqgG1EtLM3Hkq94uIhTO9/IQ9g9Tvlaopxl9w/DHZ4TpzpR01S4pX6RkepWq2KzMKvZOSVIvHQKl9EXHp2O8+EXFJZm7TYRvOpUlIv1A2vQp4Y2Z2lpBOpo1RCiaD1FnjbxSMWXD8K3nfguM1qolWK6ccEedl5vaDeKVn+KK2k7FRqma7MrUrqkpSTR4DpfZFxIeB3wAn0ZwsfyXNvLmPAmRmq4UVSxuqJ6STaeO7sckg/V3kcxQWHB+Fcsqj0DNca87iqtQ7KUkzzWOgNDoi4upycfCZjKGbs4vvp6OQkE7GnsGW9HmRz9oLjpckaB7wIZp5gwO3AZdm5t0dtKFqz3DNNRZXpd5JSZppHgOl+sq619dm5g3l+kLgFcA1wHu6TMBGISGdjD2DM2wUeqVGSV8XHK/dMzwq1WxHuaKqJLXNY6BUR0RcBDw/M2+JiD+j6ZV7I7At8MTM3KuDNoxMQjqZiPjnzHzDjD5mz5PB6r1Soyoi1husM9fmYrujUE21ds9wrTmLY9pQrXdSkmrzGCjVMzwnLyKOAW7OzPeU6xdn5rYdtKF6QlrasQ5NEjqXoVUfMvMf24rZ66UlsueLfE4mO1pwvGY55aGe4XkRsWjopg2BLs8AfTci3gmsFxEvoJmz+NUO4wMcTOmdBMjMn0XEQztugyTV4jFQqmdWRKxZOmF2pTkxM9BVrjJrqPfvlTRr+30Z+HIZwdWVU4DfARcCf+wiYK+TwVHolVpFrK7dx+cA1wObAf80tP024NIO21FtjcUh1dbalKQR4DFQqucLNCfGfw38gTJ1KyIeR5MYdWEUElKAOZm5oMN4/U4GXeSz30aoZ/iNZX7i8gSwQjXbUeidlKRaPAZKlWTmByLi28DDgW/mfXPY1qAZqtmFUUhIAc6JiKdk5mVdBez1nEFNzepaWXVUeoZrz1ks8Xq51qYkgcdASRARO3BfQvp/ZdvjgQ0y86KO2vBj4HHA1TTDRFtf5sZkUCvlYrvtGKVqtrUrqkpSTR4DJY2CiZa7aXOZG5PBHnOx3bpGqZrtKPROSlItHgMljYKIeB/wPeCcQe9k6zFNBvvLxXY1Sr2TktQ1j4GSRklE7A88l6aexW00x6XvZeYprcU0GRS42G4NozBncZR6JyWpax4DJY2iiHgYsA/wNmDjNotemgzKxXYlSZKkyiLi08DWwI00vYJnAxe1eWKq10tLaDkX2+2pUeidlKRaPAZKGjGbArOA3wK3AL9ue4SCyaDAxXZ7y7U2JfWZx0BJoyQzXwYQEU8EXgicFRGzMnNOWzFNBgUutitJkiRVFREvpSkg82fARsCZrFjcauZjOmdQLrYrSZIk1RUR/0yT/H0/M3/VSUyTQbnYriRJklRfRGwOPLNcPT8zb2oz3hptPrhWGQvH2fa6rhshSZIk9VVE7A2cD+xNs7TEeRGxV6sx7RnsLxfblSRJkkZDRFwCvGDQGxgRs4FvZeY2bcW0gEy/nQNcD2wG/NPQ9tuAS6u0SJIkSeqnNcYMC11GyyM57RmUJEmSpMoi4qPAU4EvlE2vBC7NzENbi2ky2F8utitJkiSNjoh4BbBjufr9zPyfVuOZDEqSJElS/1hNVJIkSZIqi4iXR8TPIuJ3EXFrRNwWEbe2GtOeQUmSJEmqKyKWAH+emVd2FdOeQUmSJEmq78YuE0GwZ1CSJEmSqouITwIPA74C/HGwPTP/u62YrjMoSZIkSfU9GLgd2G1oWwKtJYP2DOr/t3fHqlFFURRA91HskiY/ELSzVTsr9QvsLMRPEPQLrMTe35A0QQIi2FiqjY0IEisbScTCwkKOxcwzkMLu5gZmLXjMm1vMrjfnnTcAAMBkVUCuFdQAAAGISURBVLXT3cenzi539+GoTDuDAAAA8+1X1b//+a6qq0n2RwYqgwAAAPM9zaoQblXV9SQvktwfGWhnEAAAYLLufllVl5K8SrKd5G53fx6ZaWcQAABgkqp6ntWLYhZ3knxJ8jVJuvvhqGyTQQAAgHnenfr+/qyCTQYBAAA2kMkgAADAZFV1M8mTJLtZ9bRK0t19ZVimySAAAMBcVfUpyaOsHhP9s5x399GoTJNBAACA+X5298FZBpoMAgAATFZVz5JcTLKX5Pdy3t0fhmUqgwAAAHNV1Zv17VLQlp3B28MylUEAAIA5qurxcrv+7CTfk7zt7sOR2RdG/jgAAAD/tb2+ttbXdpIbSQ6q6t7IYJNBAACAc6aqdpK87u5rozJMBgEAAM6Z7j7OyaOjQyiDAAAA50xV3UryY2SG/xkEAACYpKo+5uQNooudJN+SPBiabWcQAABgjqraPXXUSY66+9fwbGUQAABg89gZBAAA2EDKIAAAwAZSBgEAADaQMggAALCBlEEAAIAN9BfSu8u/imrjEgAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# 图表显示下\n", + "\n", + "pd_df.plot(kind='bar', figsize=(15, 5))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 例2. 按国家查看访问量" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 8 ms, sys: 0 ns, total: 8 ms\n", + "Wall time: 62.6 ms\n" + ] + } + ], + "source": [ + "%%time\n", + "# 按照country_code 进行聚合\n", + "\n", + "country_code_count = df.select('country_code').groupBy('country_code').count()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 4 ms, sys: 0 ns, total: 4 ms\n", + "Wall time: 7.5 s\n" + ] + }, + { + "data": { + "text/plain": [ + "[Row(country_code='DZ', count=1027),\n", + " Row(country_code='LT', count=12),\n", + " Row(country_code='MM', count=18),\n", + " Row(country_code='CI', count=95814),\n", + " Row(country_code='SC', count=8)]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "# 显示前5条数据\n", + "\n", + "country_code_count.limit(5).collect()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
country_codecount
0DZ1027
1LT12
2MM18
3CI95814
4SC8
\n", + "
" + ], + "text/plain": [ + " country_code count\n", + "0 DZ 1027\n", + "1 LT 12\n", + "2 MM 18\n", + "3 CI 95814\n", + "4 SC 8" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 转换成pandas\n", + "\n", + "codePandas = country_code_count.toPandas()\n", + "\n", + "codePandas.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
country_codecount
74IN440134
139NG432983
58Unknown309625
39BD192363
146VN96927
3CI95814
130MA69185
33GH49175
77CM44962
115SN42320
\n", + "
" + ], + "text/plain": [ + " country_code count\n", + "74 IN 440134\n", + "139 NG 432983\n", + "58 Unknown 309625\n", + "39 BD 192363\n", + "146 VN 96927\n", + "3 CI 95814\n", + "130 MA 69185\n", + "33 GH 49175\n", + "77 CM 44962\n", + "115 SN 42320" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 按照访问量排序下\n", + "\n", + "codePandas = codePandas.sort_values(by='count', ascending=False)\n", + "\n", + "codePandas.head(10)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "550385a639cf43a1824e9ab6bcac42d3", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HTML(value=''.format(src))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 例3. 每小时访问量走势图" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 4 ms, sys: 0 ns, total: 4 ms\n", + "Wall time: 387 ms\n" + ] + } + ], + "source": [ + "%%time\n", + "\n", + "# 增加3个栏位, timestamp, hour, month\n", + "\n", + "df = df.withColumn('timestamp', df.events[0].timestamp)\n", + "df = df.withColumn('hour', from_unixtime(df.events[0].timestamp, 'HH'))\n", + "df = df.withColumn('month', from_unixtime(df.events[0].timestamp, 'yyyy-MM'))" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+-----+------------+------+--------------------+---------------+-----+-----------+---+----------+----+-------+\n", + "|brand|country_code|device| events| ip_address|model| partner|ref| timestamp|hour| month|\n", + "+-----+------------+------+--------------------+---------------+-----+-----------+---+----------+----+-------+\n", + "| Itel| ML| sp|[[click, 15358391...| 217.64.103.74| P13|searchturbo| m|1535839148| 05|2018-09|\n", + "| Itel| EG| sp|[[click, 15358391...|196.141.135.133| A32F|searchturbo| m|1535839156| 05|2018-09|\n", + "| Itel| NG| sp|[[click, 15358391...| 197.210.226.58| P32|searchturbo| m|1535839161| 05|2018-09|\n", + "| Itel| IN| sp|[[click, 15358391...| 157.48.123.237| A22|searchturbo| m|1535839162| 05|2018-09|\n", + "| Itel| EG| sp|[[click, 15358391...| 105.199.93.33| A32F|searchturbo| m|1535839163| 05|2018-09|\n", + "| Itel| EG| sp|[[click, 15358391...|196.141.135.133| A32F|searchturbo| m|1535839164| 05|2018-09|\n", + "| Itel| MA| sp|[[click, 15358391...| 41.249.147.213| A32F|searchturbo| m|1535839167| 05|2018-09|\n", + "| Itel| CI| sp|[[click, 15358391...| 154.0.26.115| P32| Unknown| m|1535839174| 05|2018-09|\n", + "| Itel| BJ| sp|[[click, 15358391...|197.234.221.243| A32F|searchturbo| m|1535839174| 05|2018-09|\n", + "| Itel| EG| sp|[[click, 15358391...|196.141.135.133| A32F|searchturbo| m|1535839175| 05|2018-09|\n", + "+-----+------------+------+--------------------+---------------+-----+-----------+---+----------+----+-------+\n", + "only showing top 10 rows\n", + "\n", + "CPU times: user 4 ms, sys: 0 ns, total: 4 ms\n", + "Wall time: 370 ms\n" + ] + } + ], + "source": [ + "%%time\n", + "\n", + "# 按照小时聚合下\n", + "\n", + "group_by_hour = df.select('hour').groupBy('hour').count()\n", + "df.show(10)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 8 ms, sys: 4 ms, total: 12 ms\n", + "Wall time: 10.4 s\n" + ] + } + ], + "source": [ + "%%time\n", + "# dataframe 转换成padnas\n", + "group_by_hour_pandas_df = group_by_hour.toPandas()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
hourcount
hour
00118156
11112722
2285696
3385365
4477598
\n", + "
" + ], + "text/plain": [ + " hour count\n", + "hour \n", + "0 0 118156\n", + "1 1 112722\n", + "2 2 85696\n", + "3 3 85365\n", + "4 4 77598" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 按照访问量进行排序下\n", + "group_by_hour_pandas_df = group_by_hour_pandas_df.sort_values(by='hour')\n", + "\n", + "# 强制转换整数类型\n", + "group_by_hour_pandas_df.hour = group_by_hour_pandas_df['hour'].map(int)\n", + "\n", + "# 将小时设置pandas索引\n", + "group_by_hour_pandas_df.index = group_by_hour_pandas_df.hour\n", + "\n", + "group_by_hour_pandas_df.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA4MAAAE/CAYAAAD8ABbdAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvqOYd8AAAIABJREFUeJzs3Xd4VVW+xvHvL4UESOgJLQmhSQeBUCwoiIOgIIgay6jgKDozKk5xLDNzr851xjLOqNjHjqOiCBawIaAgSg1dmgk1CSWhh5KQsu4fZ6MRIYEkZKe8n+fJk3PWXnuf3z45Yt6stdc25xwiIiIiIiJSvQT5XYCIiIiIiIiUP4VBERERERGRakhhUEREREREpBpSGBQREREREamGFAZFRERERESqIYVBERERERGRakhhUEREfGFmcWZ2wMyC/a5FTo6ZvW5mfy+jYzkza1MWxxIRkZJRGBQREV8457Y45yKcc/mlOY6ZbTKzC8uqrurKzPqbWZrfdYiISPlRGBQRkdPCzEL8rkFEn0MRkRNTGBQRqQLM7B4zm3RM2zgze8p7fKOZrTGzLDPbYGa3FurXyMw+NrO9ZrbbzOaY2XH//+BN7RvrHWOnmT12tK+ZjTazb83sCTPbBTxgZkFm9lcz22xmGWb2hpnV9frHe8cL8Z7XNbNXzGybmaWb2d8LTyE1szGFzmG1mfUws/8CccBUb8rp3Seo+4Tn720fbmbLzGy/ma03s8FeewMze83MtprZHjP7sNA+Q7199prZXDPreszPI917vXVmNtBr721mSd7r7DCzx4v4mZ6opmZmNsX7WaWY2ZhC+/xkGuexo33eKOpdZrbCzPaZ2btmFm5mtYHPgGbe+3jAzJqdoLRGZjbdO7fZZtbCO/azZvbvY85hipn9/kTnCFxoZsnee/ismZm3X1Gfm5+NYFqh0WEze8DMJpnZm2a2HxhdxOuLiFRrCoMiIlXDO8DFZhYJ4IWoROBtb3sGMBSoA9wIPGFmPbxtfwTSgCigMfBnwBXxWpcBCUAPYDjwq0Lb+gAbvOP8g8Av4qOBAUArIAJ45gTHfR3IA9oA3YFBwM3e+VwJPADc4J3DpcAu59z1wBZgmDfl9J8nOPYJz9/MegNvAH8C6gHnAZu8/f4L1AI6AdHAE94+3YFXgVuBhsB/gClmFmZm7YDbgV7OuUjgokLHGweMc87VAVoDE49XbDE1vUPg59UMuAJ4yMwuOMF5H08iMBhoCXQFRjvnDgJDgK3e+xjhnNt6gv1/CTwINAKWAW957eOBawr9caARcCE/fgaPZyjQy6sjkcB7Baf2uTme4cAkAu/dW8X0FRGpthQGRUSqAOfcZmAJgaAGcAFwyDk339v+iXNuvQuYDXwB9PP65gJNgRbOuVzn3BznXFFh8FHn3G7n3BbgSeCaQtu2Oueeds7lOecOEwgOjzvnNjjnDgD3AVfbMVP3zKwxcDHwO+fcQedcBoHgdbXX5Wbgn865Rd45pHjnfLLvT1HnfxPwqnNuunOuwDmX7pxba2ZNCQSkXzvn9njvzWxvn1uA/zjnFjjn8p1z44EcoC+QD4QBHc0s1Dm3yTm3vtB73cbMGjnnDhz9+RzHiWqKBc4B7nHOZTvnlgEvEwjJJ+sp59xW59xuYCpw5insC/CJc+5r51wO8BfgLDOLdc4tBPYBA71+VwOznHM7ijjWI865vd5n6atCtZzU56YI85xzH3rv3eFTPD8RkWpDYVBEpOp4mx+D2bUUGpExsyFmNt+bWriXQPBq5G1+DEgBvvCmUN5bzOukFnq8mcAI1fG24W0rHNo2AyEERg4LawGEAtu8KYN7CYy2RXvbY4H1nAQze6HQVMc/e21Fnf+Jjh0L7HbO7TnOthbAH4/W6h0zFmjmnEsBfkdgJDPDzN4pNOXyJuAMYK2ZLTKzoSc4jRPV1MyrKatQ22ag+QmOczzbCz0+RGDU7VT88DP2gtpufvwMjAeu8x5fR2BktSS1nOznptgaRUTkxBQGRUSqjveA/mYWQ2CE8G0AMwsDJgP/Aho75+oBnwIG4JzLcs790TnXisD0yz8cvcbtBGILPY4DCk8nPHZEcSuB4FS4fx5w7GhRKoGRtUbOuXreVx3nXKdC21ufoJ6fvKZz7teFpjo+VNz5F3HsVKCBmdU7wbZ/FKq1nnOulnNuglfD2865c71zd8CjXnuyc+4aAiH3UWCSd73e8Y5/vJq2ejVFFmqLA9K9xwcJTGs9qslxjnEiRY0GF/bDz9/MIoAG/PgZeBMYbmbdgA7Ahz/f/aQU9bn5yTl6U6Kjjtn/ZM9FRKRaUxgUEakinHOZwCzgNWCjc26Nt6kGgWmLmUCemQ0hcD0e8MNCKG28xTv2EZjmWFDES/3JzOp7UxbvBN4tou8E4Pdm1tILDg8B7zrn8o6pfRuBqZv/NrM63gIirc3sfK/Ly8BdZtbTAtocXbiEQEBoVUQNRZ4/8Apwo5kN9F63uZm192r6DHjOO99QMzvP2+cl4Ndm1serp7aZXWJmkWbWzswu8EJoNnAY7/00s+vMLMo5VwDs9Y51vPf6RDWlAnOBhy2w8EtXAqONb3r7LSNw7WgDM2tCYITyZO0AGh5dqKUIF5vZuWZWg8C1g/O9unDOpQGLCIwITi7FFM2iPjffA+He+x0K/JXAz1dERE6RwqCISNXyNscs2uFNKRxLYLGSPQSmkE4ptE9bYAZwAJgHPOec+6qI1/gIWEwgeHxCILicyKsEgsHXwEYC4eiOE/S9gUBwW+3VOYnAtYw4594jsCDN20AWgRGnBt5+DwN/9aZr3nXsQYs7f+9atxsJXKO4D5jNj6NS1xO4zm8tgUVofuftkwSMIbCoyR4C02xHe/uEAY8AOwlMg4wmcM0bBBZuWWVmBwgsJnP18QJTMTVdA8QTGD37ALjfOTfD2/ZfYDmBxWa+oOigfuxrriUQwjZ47+WJVhN9G7ifwPTQnvw4LfSo8UAXip8iWpQTfm6cc/uA3xL4A0E6gZFC3R9RRKQErOg1AkRERH5kZg5o610XV9pjtSIwyhNazII1Uol4o6dvEliQSD9XEZEKTCODIiLil87AZgWGqsObtnkn8LJ+riIiFZ/CoIiIlDsz+wPwIlDcyqVSSZhZBwLXQTYlcMsRERGp4DRNVEREREREpBrSyKCIiIiIiEg1VGwYNLNXzSzDzL4r1Pagma0ws2Vm9sXRFce85bWfMrMUb3uPQvuMMrNk72tUofaeZrbS2+cpb2lzvGWxp3v9p5tZ/bI9dRERERERkeqr2Gmi3qpgB4A3nHOdvbY6zrn93uOxQEfn3K/N7GICSz9fDPQBxjnn+phZAyAJSCBwI9jFQE/n3B4zW0hgye8FBG4C/JRz7jMz+yew2zn3iJndC9R3zt1T3Ak1atTIxcfHn/o7ISIiIiIiUgUsXrx4p3Muqrh+IcV1cM59bWbxx7TtL/S0NoGABzCcQGh0wHwzq2dmTYH+wHTn3G4AM5sODDazWUAd59x8r/0NYASBm/wO9/aDwD2LZgHFhsH4+HiSkpKK6yYiIiIiIlIlmdnmk+lXbBgs4gX+QeAGwfuAAV5zcyC1ULc0r62o9rTjtAM0ds5t8x5vBxqXtFYRERERERH5qRIvIOOc+4tzLhZ4C7i97Eo67ms5fhx9/Bkzu8XMkswsKTMz83SWIiIiIiIiUiWUxWqibwGXe4/TgdhC22K8tqLaY47TDrDDm2KK9z3jRAU45150ziU45xKiooqdGisiIiIiIlLtlWiaqJm1dc4le0+HA2u9x1OA283sHQILyOxzzm0zs2nAQ4VWBB0E3Oec221m+82sL4EFZG4Ani50rFHAI973j0pSK0Bubi5paWlkZ2eX9BAVXnh4ODExMYSGhvpdioiIiIiIVALFhkEzm0BgIZdGZpYG3A9cbGbtgAJgM/Brr/unBFYSTQEOATcCeKHvQWCR1+//ji4mA/wWeB2oSWDhmM+89keAiWZ2k/caiSU9ybS0NCIjI4mPj8e7c0WV4pxj165dpKWl0bJlS7/LERERERGRSqDYW0tUNgkJCe7Y1UTXrFlD+/btq2QQPMo5x9q1a+nQoYPfpYiIiIiIiI/MbLFzLqG4fmVxzWClUJWDIFT98xMRERERkbJVbcKgiIiIiIiI/EhhUEREREREpBpSGCwnmzZtokOHDowZM4ZOnToxaNAgDh8+TP/+/Tl6jePOnTuJj48H4PXXX2fEiBH84he/ID4+nmeeeYbHH3+c7t2707dvX3bv3l3Eq4mIiIiIyOninOOrdRnMW7/L71JKRWGwHCUnJ3PbbbexatUq6tWrx+TJk4vs/9133/H++++zaNEi/vKXv1CrVi2WLl3KWWedxRtvvFFOVYuIiIiICHghcG0GI56by42vLeLlORv8LqlUSnSfwcrsb1NXsXrr/jI9Zsdmdbh/WKdi+7Vs2ZIzzzwTgJ49e7Jp06Yi+w8YMIDIyEgiIyOpW7cuw4YNA6BLly6sWLGi1HWLiIiIiEjxnHPMWpfJkzO+Z3naPmLq1+SRkV0Y2SPG79JKpdqFQT+FhYX98Dg4OJjDhw8TEhJCQUEBANnZ2SfsHxQU9MPzoKAg8vLyyqFiEREREZHq6+h00CdnJLPCC4GPXh4IgaHBlX+SZbULgyczglee4uPjWbx4Mb1792bSpEl+lyMiIiIiUu055/hybQbjZgZCYGyDqhUCj6p2YbCiueuuu0hMTOTFF1/kkksu8bscEREREZFqyznHzDWBELgyPRAC/3l5Vy7r0bxKhcCjzDnndw1lKiEhwR1dnfOoNWvW0KFDB58qKj/V5TxFRERERMrS0RD45Mzv+S59P3ENanH7BW24rHvlDIFmttg5l1BcP40MioiIiIhIteScY8aaDMYVCoGPXdGVEZU0BJ4qhUEREREREalWnHNMX72DcTOTWbV1Py0aVq8QeJTCoIiIiIiIVAvOOb5YvYOnCoXAf13ZjRFnNiOkGoXAo6pNGHTOYWZ+l3HaVLVrP0VEREREysrREDhuRjKrt+0nvmEt/n1lN4ZX0xB4VLUIg+Hh4ezatYuGDRtWyUDonGPXrl2Eh4f7XYqIiIiISIVRUOCFwJnJrNm2n5aNavN4Yjcu7Va9Q+BR1SIMxsTEkJaWRmZmpt+lnDbh4eHExMT4XYaIiIiIiO8CIXA7T85IZu32LIXAE6gWYTA0NJSWLVv6XYaIiIiIiJxGBQWOaau2M25mIAS2alSbJ67qxrCuCoHHUy3CoIiIiIiIVF3HC4FPXnUmw7o1Izio6l0mVlYUBkVEREREpFIqKHB8vmo742Yks25HFq2iajPu6jMZ2lUh8GQoDIqIiIiISKVSUOD47LvtPDUzEAJbKwSWiMKgiIiIiIhUCoeO5PHB0nRe+3YTKRkHaBMdoRBYCgqDIiIiIiJSoaXuPsR/52/mnYVb2J+dR+fmdRQCy4DCoIiIiIiIVDjOOeZt2MXr325ixpodmBmDOzfhxrPj6dmifpW8f3h5KzYMmtmrwFAgwznX2Wt7DBgGHAHWAzc65/Z62+4DbgLygbHOuWle+2BgHBAMvOyce8Rrbwm8AzQEFgPXO+eOmFkY8AbQE9gFXOWc21RG5y0iIiIiIhXQ4SP5fLgsnde/3cS6HVnUrxXKb/q35rq+LWhat6bf5VUpJ3OzjdeBwce0TQc6O+e6At8D9wGYWUfgaqCTt89zZhZsZsHAs8AQoCNwjdcX4FHgCedcG2APgSCJ932P1/6E109ERERERKqg9L2HefizNfR9eCb3vb+SoCDjn1d0Zd59A/nTRe0VBE+DYkcGnXNfm1n8MW1fFHo6H7jCezwceMc5lwNsNLMUoLe3LcU5twHAzN4BhpvZGuAC4Fqvz3jgAeB571gPeO2TgGfMzJxz7hTOT0REREREKijnHAs37ub1uZuYtmo7AIM7N2H02S3pFa+poKdbWVwz+CvgXe9xcwLh8Kg0rw0g9Zj2PgSmhu51zuUdp3/zo/s45/LMbJ/Xf+exBZjZLcAtAHFxcaU8HREREREROZ2yc/OZsmwrr83dxJpt+6lXK5RbzmvN9We1oHk9jQCWl1KFQTP7C5AHvFU25ZSMc+5F4EWAhIQEjRyKiIiIiFRAW/ce5s35m5mwcAt7DuXSvkkkj4zswvAzm1OzRrDf5VU7JQ6DZjaawMIyAwtN3UwHYgt1i/HaOEH7LqCemYV4o4OF+x89VpqZhQB1vf4iIiIiIlJJOOdI2ryH17/dxOertuOc4xcdGzP67Jb0bdVAU0F9VKIw6K0MejdwvnPuUKFNU4C3zexxoBnQFlgIGNDWWzk0ncAiM9c655yZfUXgmsN3gFHAR4WONQqY523/UtcLioiIiIhUDtm5+Xy8Yhuvz93Id+n7qRMews3ntuS6vi2IbVDL7/KEk7u1xASgP9DIzNKA+wmsHhoGTPeS/Hzn3K+dc6vMbCKwmsD00ducc/necW4HphG4tcSrzrlV3kvcA7xjZn8HlgKveO2vAP/1FqHZTSBAioiIiIhIBbZjfzZvzt/M2wu2sOvgEc5oHMFDl3VhRPdm1Kqh25xXJFbVBtsSEhJcUlKS32X8xEfL0qlVI4QLO0RrGFxEREREqhznHEu27OX1uZv4bOU28p1jYPvG3HhOPGe3bqjfgcuZmS12ziUU10/R/DRzzjF+7iaWbNlL97h6/OmidpzdupHfZYmIiIiIlFpOXj6frNjG63M3sSJtH5HhIYw+O54bzoonrqGmglZ0GhksB7n5BUxanMa4Gcls35/NOW0actegdnSPq+93aSIiIiIipyxjfzZvLdjCWwu2sPNADq2jajP6nJaM7N6c2mEab/LbyY4MKgyWo+zcfN5asIVnv0ph98Ej/KJjY/446AzaN6njd2kiIiIiIsXKzMrhkc/WMmV5OnkFjgvaRTP6nHjObdNIU0ErEIXBCuxATh6vfbORF7/ewIEjeQzv1ozfXXgG8Y1q+12aiIiIiMjPOOeYsnwrD0xZxcGcfK7tE8fos+P1+2sFpTBYCew9dIQXZm/g9bkbyct3JPaKZewFbWlSN9zv0kREREREgMDqoH/5YCUz1mTQPa4ej13RlTbRkX6XJUVQGKxEMvZn88xXKUxYuAUzY9RZLfhN/zY0qF3D79JEREREpJpyzvHe4jQe/Hg1ufkF3DWoHTee05LgIE0HregUBiuh1N2HGDczmfeXpFEzNJib+rXi5n4tqRMe6ndpIiIiIlKNpO89zL2TVzAneSe9Wzbgn5d31ZTQSkRhsBJLycji8enf8+nK7dSrFcpvzm/NDWfFU7NGsN+liYiIiEgVVlDgeHvhFh7+dA0OuG9Ie37ZpwVBGg2sVBQGq4CVafv41xfrmP19JtGRYdwxsC1XJcRSIyTI79JEREREpIrZvOsg905eybwNuzi3TSMeHtmF2Aa6V2BlpDBYhSzYsIt/fbGORZv2ENugJr+/8AyGn9lc87VFREREpNTyCxzj527isWnrCAky/jq0A4kJsbpVRCWmMFjFOOeY/X0mj01bx6qt+2kbHcEfB7Xjok6N9R+qiIiIiJTI+swD3D1pBYs372FAuygeGtmFpnVr+l2WlNLJhsGQ8ihGSs/M6N8umvPaRvH5qu38+4t1/PrNxXSNqctdg9rRr61u9CkiIiIiJycvv4CXv9nI49O/p2ZoMI8nduOy7s31+2Q1o5HBSiovv4APlqbz5Ixk0vcepk/LBtw9uB09WzTwuzQRERERqcDWbc/i7knLWZ62j4s6NebBEZ2JjtR9rqsSTROtJnLy8nlnYSpPf5nCzgM5XNA+mj8OOoNOzer6XZqIiIiIVCC5+QU8P2s9T3+ZTJ3wUP42vBOXdGmq0cAqSGGwmjl0JI/xczfzwuz17Ducy9CuTfnDL86gVVSE36WJiIiIiM++S9/HnyatYM22/VzarRn3D+tIw4gwv8uS00RhsJradziXl+ds4JVvNpKTV8AVPWIYe2FbmtfThcAiIiIi1U1OXj5Pz0zh+dnraVC7Bv8Y0ZlBnZr4XZacZgqD1dzOAzk899V63py/GYBf9o3jtgFtaKS/AImIiIhUC0u37OHuSStIzjjA5T1i+N+hHalbK9TvsqQcKAwKAFv3HubpL5OZmJRG2+gIPruzn+aFi4iIiFRh2bn5PD79e16es4HGdcJ5aGQXBrSL9rssKUe6tYQA0KxeTR4e2ZUzY+txz+SVLN68h4R4rTgqIiIiUhUt2rSbuyetYOPOg1zbJ477hrQnMlyjgXJ8QX4XIOVjaNdm1K4RzLuLUv0uRURERETK2MGcPB6YsorE/8wjN7+At27uw0OXdVEQlCIpDFYTtcNCGNatGZ+s3MaBnDy/yxERERGRMjI3ZSeDx33N63M3MeqseKb97jzOadPI77KkElAYrEauTIjl0JF8Plmx1e9SRERERKSUsrJzue/9lVz78gJCgoKYeOtZPHBpJ2qH6UowOTnFhkEze9XMMszsu0JtV5rZKjMrMLOEY/rfZ2YpZrbOzC4q1D7Ya0sxs3sLtbc0swVe+7tmVsNrD/Oep3jb48vihKuzHnH1aBMdwcSkNL9LEREREZFSmLUug0FPfM27i7Zwy3mt+HRsP3q31LoQcmpOZmTwdWDwMW3fASOBrws3mllH4Gqgk7fPc2YWbGbBwLPAEKAjcI3XF+BR4AnnXBtgD3CT134TsMdrf8LrJ6VgZiQmxLB48x5SMg74XY6IiIiInKJ9h3K5673ljH5tERFhIUz+zdn8+eIO1KwR7HdpUgkVGwadc18Du49pW+OcW3ec7sOBd5xzOc65jUAK0Nv7SnHObXDOHQHeAYZb4B4HFwCTvP3HAyMKHWu893gSMNB0T4RSu6x7DCFBxntJWkhGREREpDI4klfA7O8z+fMHKxnw71l8sDSd2we04eOx59I9rr7f5UklVtYTipsD8ws9T/PaAFKPae8DNAT2OufyjtO/+dF9nHN5ZrbP67+zjGuuVqIiw7igfTSTl6Rz10XtCA3WZaMiIiIiFc2BnDxmrcvgi1U7+GptBlk5edSqEUz/dlH8tn8bOjev63eJUgVUiatLzewW4BaAuLg4n6up+BITYvlideAflkGdmvhdjoiIiIgAOw/kMGP1Dqat2s63Kbs4kl9Ag9o1GNKlCRd1asI5bRoRHqrpoFJ2yjoMpgOxhZ7HeG2coH0XUM/MQrzRwcL9jx4rzcxCgLpe/59xzr0IvAiQkJDgyuZUqq7+7aKIigxjYlKawqCIiIiIj7bsOsQXq7czbdV2kjbvwTmIqV+T689qwaCOjUmIb0BwkK6UktOjrMPgFOBtM3scaAa0BRYCBrQ1s5YEQt7VwLXOOWdmXwFXELiOcBTwUaFjjQLmedu/dM4p6JWBkOAgLu8Rw0tzNpCxP5voOuF+lyQiIiJSLTjnWL1tP9NW7eCLVdtZuz0LgPZNIhl7QVsGdWpMx6Z10FIZUh6KDYNmNgHoDzQyszTgfgILyjwNRAGfmNky59xFzrlVZjYRWA3kAbc55/K949wOTAOCgVedc6u8l7gHeMfM/g4sBV7x2l8B/mtmKd7rXV0WJywBiQkxvDB7Pe8vTefX57f2uxwRERGRKiu/wJG0aXcgAK7eTtqew5hBrxYN+OslHRjUsQlxDWv5XaZUQ1bVBtsSEhJcUlKS32VUCle+MJddB48w8w/n669PIiIiImUoOzefb5J38sXq7cxYk8Hug0eoERzEuW0bcVGnxgzs0JhGEWF+lylVlJktds4lFNevSiwgIyVzZUIsd09aweLNe0iI101KRUREREpj3+FcvlqbwbRV25n9fSaHjuQTGRbCBR2iGdSxCee3iyIiTL9+S8WhT2M1dkmXpvxtyiomJqUqDIqIiIiUwI792XyxOnD937z1u8grcERFhnFZ9+YM6tSEs1o1pEaIbuUlFZPCYDVWOyyEoV2bMXXFVv53WCf9pUpERETkJKzPPMC0Vdv5YtUOlqXuBaBlo9rc1K8lF3Vqwpkx9QjSCqBSCei3/2ousVcs7yal8umKbST2ii1+BxEREZFqJr/AsTxt7w/3AFyfeRCArjF1+dNF7RjUsTFtoiO0BoNUOgqD1VyPuHq0jqrNu0mpCoMiIiIinoM5ecxJzmTGmgy+WpvBroNHCA4y+rRswA1nxfOLjo1pVq+m32WKlIrCYDVnZlzVK5aHPl1LSsYB2kRH+F2SiIiIiC/S9hziy7UZzFiTwfz1uziSX0Cd8BD6t4tmYIdo+p8RTd1aoX6XKVJmFAaFy7rH8Ojn63hvcSr3DengdzkiIiIi5aKgwLEsbS8z1+xg5pqMH24A37JRbW44qwUDOzQmIb4+ocFaAEaqJoVBISoyjAvaRzN5cTp3DWqnf/BERESkygpM/9zJzDU7+GpdBjsPBKZ/9mxRnz9f3J6BHRrTOkozpaR6UBgUAK5KiGX66h3MWpfJLzo29rscERERkTKTvvcwX67ZwYw1GczbsIsjeQVEetM/L+wQzflnRFGvVg2/yxQpdwqDAkD/dlFERYbx7qJUhUERERGp1Aq81T9nrslgxpodP0z/jG9Yi+v7tmBgh2h6xTfQbCip9hQGBYCQ4CAu7xHDS3M2kJGVTXRkuN8liYiIiJy0Q0d+nP755dpMdh7IIcggoUUD7htydPpnbd3+QaQQhUH5wZUJMbwwez3vL0nn1+e39rscERERkSJt3XuYmWszmLlmB3PXe9M/w0I4r10UF3qrf9avremfIieiMCg/aB0VQUKL+kxMSuXW81rpL2ciIiJSoRQUOFak7/th9c/V2/YD0KJhLa7r04ILO0TTq6Wmf4qcLIVB+YnEXrHcPWkFS7bsoWeLBn6XIyIiIlWcc46DR/LJys4lKzuPrOxc9mfnkZWdxwHveVZ2Htv2ZfN1ciaZWYHpnz1b1OfeIe25sEM0raMi9EdskRJQGJSfuKRLU/42ZRXvLkpVGBQREZEi5Rc4DuTkFQpyhR7/pP04273HB3LyKHBFv06QQf1aNejbqmHg5u/tommg6Z8ipaYwKD9ROyyEoV2bMXXFVu4f1onaYfqIiIiIVFf7DufyxartfJOyk72Hcn8S5A7kBL6KExpsRIaHEhEWQmR44Cu2QS3hZSduAAAgAElEQVQiw0OoEx76Q9tP+4RSx/seGR5CrRrBGvkTOQ30m778TGKvGN5NSuWTFdtI7BXrdzkiIiJSjg4dyWPmmgymLN/K7HWZHMkvoHGdMBrXCScyPIToyPAfAtuPQa7w8x/b64SHEhYSpCAnUkEpDMrP9IirT+uo2kxMSlUYFBERqQZy8vL5+vudTFm+lRmrd3A4N5/GdcK4/qwWDOvWjG4xdRXoRKoghUH5GTMjMSGWhz9bS0rGAdpER/hdkoiIiJSxvPwC5m3YxdTlW/n8u+3sz86jfq1QRvZozrBuzegV34DgIAVAkapMYVCO67IezfnntHW8tziV+4Z08LscERERKQMFBY7FW/YwdflWPl25jZ0HjhAZFsKgTk0Y1q0p57RppNsyiFQjCoNyXNGR4VzQPprJi9O5a1A7/Y9BRESkknLO8V36fqau2MrHy7eydV82YSFBXNihMcO6NaN/uyjCQ4P9LlNEfKAwKCeUmBDL9NU7mLUuk190bOx3OSIiInIKUjKymLJsK1NXbGPjzoOEBhvntY3i7sHtubBjYyK0YrhItad/BeSEBrSLIioyjIlJqQqDIiIilUDq7kNMWb6Vqcu3snZ7FkEGZ7VuyK3ntWJw5ybUq6V784nIj4oNg2b2KjAUyHDOdfbaGgDvAvHAJiDRObfHAstMjQMuBg4Bo51zS7x9RgF/9Q77d+fceK+9J/A6UBP4FLjTOedO9BqlPmM5aSHBQYzs0ZyX52wkIyub6Mhwv0sSERGRY+zYn83HK7YxdflWlqXuBaBni/o8MKwjF3dtqv9/i8gJncyFYK8Dg49puxeY6ZxrC8z0ngMMAdp6X7cAz8MP4fF+oA/QG7jfzOp7+zwPjCm03+BiXkPKUWJCLPkFjg+WpPtdioiIiHj2HDzC2wu2cPWL8+j78Ewe/Hg1ufkF3DukPXPuHsDk35zN6HNaKgiKSJGKHRl0zn1tZvHHNA8H+nuPxwOzgHu89jeccw6Yb2b1zKyp13e6c243gJlNBwab2SygjnNuvtf+BjAC+KyI15By1DoqgoQW9Xk3KZVbzmulewyJiIj4JCs7l+mrdzBl+Va+Sd5JXoGjVVRt7hzYlqFdm+lWUCJyykp6zWBj59w27/F24OgFZc2B1EL90ry2otrTjtNe1GtIOUtMiOXuyStYsmUPPVs08LscERGRaiM3v4Dpq3cwdflWvlybQU5eAc3r1eSmfi25tFszOjatoz/UikiJlXoBGe/6PlcWxZT0NczsFgLTUomLizudpVRLl3RtygNTVzFxUZrCoIiISDlJ33uY299ewtIte2kUEcY1veMY1q0ZPeLqKQCKSJkoaRjcYWZNnXPbvGmgGV57OhBbqF+M15bOj1M+j7bP8tpjjtO/qNf4Gefci8CLAAkJCac1mFZHtcNCGNq1KR+v2Mr/DutIbS1FLSIiclrNXLODP0xcTn6B48mrzmRYt2YEBykAikjZKumdxKcAo7zHo4CPCrXfYAF9gX3eVM9pwCAzq+8tHDMImOZt229mfb2VSG845ljHew3xwVW9Yjl4JJ9PVm4rvrOIiIiUSG5+AQ9/uoabxifRvF5Npt5xLiO6N1cQFJHT4mRuLTGBwKheIzNLI7Aq6CPARDO7CdgMJHrdPyVwW4kUAreWuBHAObfbzB4EFnn9/u/oYjLAb/nx1hKfeV8U8Rrigx5x9WkVVZuJi1JJTIgtfgcRERE5JVv3HuaOCUtZvHkPv+wTx/8M7Uh4aLDfZYlIFWaBhT+rjoSEBJeUlOR3GVXSC7PX88hna5n5x/NpHaUVy0RERMrKV+sy+MO7yziSV8DDl3fl0m7N/C5JRCoxM1vsnEsorl9Jp4lKNTSyR2CaysSk1OI7i4iISLHy8gt49PO13PjaIhrXCWfqHecqCIpIuVEYlJMWHRnOgHbRTF6cTm5+gd/liIiIVGrb92VzzUvzeX7Weq7pHceHt51DK828EZFypDAop+SqXrHsPJDDrHWZfpciIiJSac3+PpOLn5rDqq37efKqM3l4ZBddHygi5U5hUE5J/3ZRNIoI01RRERGREsjLL+CxaWsZ9epCoiLCmHJ7YLVQERE/6IZxckpCg4O4vGdzXp6zkYysbKIjw/0uSUREpFLYsT+bsROWsmDjbq5KiOWBSztRs4ZGA0XEPxoZlFN2Zc9Y8gscHyxJ97sUERGRSmFOciYXj5vDirR9PJ7YjUev6KogKCK+UxiUU9YmOoKeLeozMSmVqnZrEhERkbKUX+B4/It13PDqQhpG1GDqHecwskeM32WJiAAKg1JCVyXEsj7zIEu27PG7FBERkQopY382v3x5Pk99mcIVPWL48LZzaBMd6XdZIiI/UBiUErm4a1Nq1Qhm4qI0v0sRERGpcL5N2cnFT33DstS9/OvKbjx2ZTdq1dBSDSJSsSgMSolEhIUwtGtTPl6xlYM5eX6XIyIiUiHkFzienPE9172ygHq1Qply+7lc0VPTQkWkYlIYlBJLTIjl4JF8Plm5ze9SREREfJeZlcMNry7gyRnJXNa9OVNuP4czGmtaqIhUXJqvICXWs0V9WkXV5r2kVBITYv0uR0RExDdz1+/kzneWkZWdyz+v6MqVPWMwM7/LEhEpkkYGpcTMjMSEWBZt2sP6zAN+lyMiIlLu8gscT81M5rqXF1AnPISPbjuXxIRYBUERqRQUBqVURvZoTnCQ8V6SFpIREZHqZeeBHEa/tpDHp3/Ppd2aMeX2c2nXRNNCRaTyUBiUUomODGdAu2gmL0kjL7/A73JERETKxfwNu7h43BwWbtzNIyO78MRVZ1I7TFffiEjlojAopZaYEENmVg6z1mX6XYqIiMhpVVDgeParFK59aT4RYSF8eNs5XN07TtNCRaRS0p+wpNQGtI+mUUQY7yalcmHHxn6XIyIiclrsOpDD7ycu5+vvM7m0WzMeGtmFCI0Gikglpn/BpNRCg4O4vEdzXv5mIxlZ2URHhvtdkoiISJlauHE3d0xYwp5DuTx0WReu6a1FYkSk8tM0USkTVybEkl/g+HBput+liIiIlJmCAsdzs1K45qX51AwN5oPfns21fTQtVESqBoVBKRNtoiPo2aI+7y5KxTnndzkiIiKllpGVzU3jF/HPz9cxpHMTpt5xLp2a1fW7LBGRMqNpolJmEhNiuGfySpZs2UvPFvX9LkdERKREUncf4sWvN/BuUio4eHBEZ67TaKCIVEEKg1JmLunajL9NXc3ERakKgyIiUukk78ji+dnr+WjZVoIMrugZw63ntSa+UW2/SxMROS0UBqXMRISFcEmXpny8Yiv/O6yj7rckIiKVwoq0vTz7VQrTVu2gZmgwo8+OZ0y/VjSpqwXRRKRqK9U1g2Z2p5l9Z2arzOx3XlsDM5tuZsne9/peu5nZU2aWYmYrzKxHoeOM8vonm9moQu09zWylt89TpvkZFd5VvWI5eCSfT1Zu87sUERGRE3LOMW/9Lq5/ZQGXPvMt89bvYuzAtnx77wX8z9COCoIiUi2UeOjGzDoDY4DewBHgczP7GLgFmOmce8TM7gXuBe4BhgBtva8+wPNAHzNrANwPJAAOWGxmU5xze7w+Y4AFwKfAYOCzktYsp1/PFvVp1ag27yWlkpgQ63c5IiIiP+Gc48u1GTz7VQpLtuylUUQY9w1pz7V94ogMD/W7PBGRclWaeXwdgAXOuUMAZjYbGAkMB/p7fcYDswiEweHAGy6w1OR8M6tnZk29vtOdc7u940wHBpvZLKCOc26+1/4GMAKFwQrNzLgyIZZHP1/LhswDtIqK8LskERER8gscn6zcxnNfpbB2exYx9Wvy4IjOXNkzhvDQYL/LExHxRWmmiX4H9DOzhmZWC7gYiAUaO+eOzhHcDjT2HjcHUgvtn+a1FdWedpx2qeAu79Gc4CBjYlJa8Z1FREROo5y8fCYs3MLAf89i7ISl5BU4Hk/sxld39ef6vi0UBEWkWivxyKBzbo2ZPQp8ARwElgH5x/RxZnbabzpnZrcQmJ5KXFzc6X45KUZ0nXAGtIti8pI07hp0BiHBup2liIiUr0NH8nh7wRZemrOBHftz6BpTlxeu68mgjo0JCtISBCIiUMrVRJ1zrwCvAJjZQwRG73aYWVPn3DZvGmiG1z2dwMjhUTFeWzo/Tis92j7La485Tv/j1fEi8CJAQkKC7nheASQmxDJjTQaz1mVyYcfGxe8gIiJSBvYdymX8vE289u1G9hzK5axWDfnXld04t00j3SdQROQYpQqDZhbtnMswszgC1wv2BVoCo4BHvO8fed2nALeb2TsEFpDZ5wXGacBDR1cdBQYB9znndpvZfjPrS2ABmRuAp0tTr5SfAe2jaRRRg4lJqQqDIiJy2mVkZfPKNxt5c95mDh7J58IO0fymfxvd91ZEpAilvRHcZDNrCOQCtznn9prZI8BEM7sJ2Awken0/JXBdYQpwCLgRwAt9DwKLvH7/d3QxGeC3wOtATQILx2jxmEoiNDiIkT1iePWbjWRm5RAVGeZ3SSIiUgWl7j7Ef75ez8SkNPLyCxjatRm/6d+aDk3r+F2aiEiFZ4HFPauOhIQEl5SU5HcZAqRkZHHh41/z54vbc8t5rf0uR0REqpDkHVk8P2s9Hy3fSpDBFT1juPW81sQ3qu13aSIivjOzxc65hOL6lXZkUOSE2kRH0iOuHhOT0hjTr5Wu1RARkVJbnrqX52alMG3VDmqGBjP67HjG9Gulm8SLiJSAwqCcVlf1iuWeyStZsmWvrtsQEZEScc4xf8NunpuVwpzkndQJD2HswLaMPjueBrVr+F2eiEilpTAop9UlXZvxt6mreS8pVWFQREROiXOOmWsyeHZWCku37KVRRBj3DWnPtX3iiAwP9bs8EZFKT2FQTquIsBAu6dKUqcu38j9DO1I7TB85EREp3oINu7h/yirWbs8ipn5NHhzRmSt7xugm8SIiZUh3A5fTLrFXLAeP5PPpym1+lyIiIpXAvPW7GPXaQg7n5vN4Yje+uqs/1/dtoSAoIlLGFAbltEtoUZ9WjWozMSnV71JERKSCW7RpNzeNX0Rs/Vq8/5uzGdkjhtBg/boiInI66F9XOe3MjCsTYlm0aQ8bMg/4XY6IiFRQS7bs4cbXFtGkbjhvjelDwwjdo1ZE5HRSGJRycXmP5gQHGW/O38Lug0c4mJNHXn6B32WJiEgFsSJtL6NeXUijiBpMGNOX6EjdKkJE5HTTah5SLqLrhDOgXRSvfruRV7/d+EN7SJARFhJEWGhw4HtIEGEhwYSHBr6Hhf7Y9pN+oYXaQoIIDw0+7nHCQoMIP+Y4dWqGEBai605ERCqKVVv3cf0rC6lXK5S3x/SlcR0FQRGR8qAwKOXm7yO6MLBDBjm5+eTkFXhf+eTkFpDtff+hLa+AnNwCDubksetAoba8gp/sXxJ1a4by6OVdGdy5SRmfoYiInKp127O47uUF1K4RzNs396VZvZp+lyQiUm0oDEq5aVI3nGt6x5XZ8ZxzPwuVOXkFZP8QFn8MlT8+zue9xWn8+s3FjD47nvsubq9RQhERn6RkZPHLl+dTIySICbf0JbZBLb9LEhGpVhQGpdIyM8JDg72lxk/+5sOJvWJ55LO1vPbtJhZv3sOz1/YgrqF+ARERKU8bMg9wzUsLMDPeHtOXFg1r+12SiEi1owVkpNoJCwnm/mGdeOG6nmzedZBLnpqjeyCKiJSjzbsOcu1LCygocLx9cx9aR0X4XZKISLWkMCjV1uDOTfhkbD9aRUfw27eWcP9H35GTl+93WSIiVVrankNc+9ICcvLyeWtMH9o2jvS7JBGRakthUKq12Aa1eO/Ws7jp3JaMn7eZy5+fy6adB/0uS0SkStq27zDXvDSfrOxc/ntTH9o3qeN3SSIi1ZrCoFR7NUKC+J+hHXnphgRSdx9m6NPf8PGKrX6XJSJSpezYn821Ly1g78FAEOzcvK7fJYmIVHsKgyKeX3RszCdjz6Vt4whuf3spf/1wJdm5mjYqIlJamVk5XPvSfDL2Z/P6r3rTLbae3yWJiAgKgyI/EVO/FhNvPYtbzmvFm/O3MPK5uWzUtFERkRLbdSCHX748n617s3ntxt70bFHf75JERMSjMChyjNDgIP58cQdeGZXA1n2HGfrUHKYs17RREZFTtffQEa57ZSGbdx3ildEJ9G7ZwO+SRESkEIVBkRMY2KExn47tR/umdRg7YSn3va9poyIiJ2vf4Vyuf2Uh6zMP8NINCZzdupHfJYmIyDEUBkWK0KxeTd65pS+3nt+KCQu3MOLZb1mfecDvskREKrSs7FxGvbqQtdv385/renLeGVF+lyQiIsehMChSjNDgIO4b0oHXRvdix/5shj39DR8uTfe7LBGRCulgTh43vraI79L38ey1PRjQPtrvkkRE5AQUBkVO0oD20Xx6Zz86NavD795dxr2TV3D4iKaNiogcdfhIPr96fRFLU/fy9DXdGdSpid8liYhIEUoVBs3s92a2ysy+M7MJZhZuZi3NbIGZpZjZu2ZWw+sb5j1P8bbHFzrOfV77OjO7qFD7YK8txczuLU2tImWhad2aTBjTl9/2b807i1IZ8ey3pGRo2qiISHZuPmPeSGLRpt08ntiNIV2a+l2SiIgUo8Rh0MyaA2OBBOdcZyAYuBp4FHjCOdcG2APc5O1yE7DHa3/C64eZdfT26wQMBp4zs2AzCwaeBYYAHYFrvL4ivgoJDuLuwe0Z/6veZB7IYdjT3zB5cZrfZYmI+CYnL59b/7uYb9fv5LErujH8zOZ+lyQiIiehtNNEQ4CaZhYC1AK2ARcAk7zt44ER3uPh3nO87QPNzLz2d5xzOc65jUAK0Nv7SnHObXDOHQHe8fqKVAjnnxHFp2P70SWmLn98bzl/em+5po2KSLVzJK+A295awuzvM3lkZBcu7xnjd0kiInKSShwGnXPpwL+ALQRC4D5gMbDXOZfndUsDjv55sDmQ6u2b5/VvWLj9mH1O1C5SYTSpG87bN/fhjgvaMGlJGpc+8w3JO7L8LktEpFzk5hdwx4QlzFiTwd9HdOaqXnF+lyQiIqegNNNE6xMYqWsJNANqE5jmWe7M7BYzSzKzpMzMTD9KkGosJDiIPw5qxxu/6s3ug0e49JlveS8ptfgdRUQqsbz8An7/7jKmrdrB/cM6cl3fFn6XJCIip6g000QvBDY65zKdc7nA+8A5QD1v2ihADHB0Df50IBbA214X2FW4/Zh9TtT+M865F51zCc65hKgo3ctI/NGvbRSf3dmPbrF1+dOkFfxh4jIOHckrfkcRkUomv8Dxp0kr+HjFNv5ycQduPKel3yWJiEgJlCYMbgH6mlkt79q/gcBq4CvgCq/PKOAj7/EU7zne9i+dc85rv9pbbbQl0BZYCCwC2nqrk9YgsMjMlFLUK3LaRdcJ562b+3LnwLZ8sDSdS5/5lnXbNW1URKqOggLHvZNX8MHSdP50UTvGnNfK75JERKSESnPN4AICC8EsAVZ6x3oRuAf4g5mlELgm8BVvl1eAhl77H4B7veOsAiYSCJKfA7c55/K96wpvB6YBa4CJXl+RCi04yPj9L87gzZv6sPdQLsOf/YZ3F20h8LcPEZHKyznHXz78jvcWp/G7C9ty24A2fpckIiKlYFXtF9SEhASXlJTkdxkiAGRkZfO7d5Yxd/0uLuvenL+P6EztsJDidxQRqWCcczwwZRXj523mtgGtuWtQOwITg0REpKIxs8XOuYTi+pX21hIiUoToyHD+e1Mffn/hGXy0LJ1hz3zDmm37/S5LROSUOOf4+ydrGD9vM7ec10pBUESkilAYFDnNgoOMOy9sy5s39yErO48Rz37L2ws0bVREKgfnHI9+vo5XvtnI6LPjuW9IewVBEZEqQmFQpJyc3boRn47tR++WDfjzByu57e0l7DuU63dZIiJFemJGMi/MXs91feO4f1hHBUERkSpEYVCkHEVFhjH+xt7cM7g9X6zawZBxX7Nw426/yxIROa6nZybz1MxkrkqI5f8u7awgKCJSxSgMipSzoCDjN/1bM/k3ZxMaEsTVL87j8enfk5df4HdpIiI/eGH2ev49/XtG9mjOwyO7EBSkICgiUtUoDIr4pFtsPT4Z24/Lusfw1MxkEv8zj9Tdh/wuS0SE/87fzCOfreXSbs147IpuCoIiIlWUwqCIjyLCQvh3YjfGXX0myTsOcPG4OUxZvtXvskSkGvt+RxYPTl3NBe2jeTyxG8EKgiIiVZbCoEgFMPzM5nx6Zz/aNo5g7ISl3PXecg7k5PldlohUM7n5Bfxx4nIiwkP45xVdCQnWrwkiIlWZ/pUXqSBiG9Ri4q1nMfaCNry/JI2hT81heepev8sSkWrkua/WszJ9H/8Y0ZlGEWF+lyMiIqeZwqBIBRISHMQfBrVjwpi+HMkr4PLn5/LC7PUUFOiehCJyen2Xvo+nv0xm+JnNGNKlqd/liIhIOVAYFKmA+rRqyGd3nsegTo155LO1XPfKAnbsz/a7LBGponLy8rnrveU0qF2Dv13aye9yRESknCgMilRQdWuF8uy1PXj08i4s3bKXwU9+zfTVO/wuS0SqoHEzklm7PYtHLu9CvVo1/C5HRETKicKgSAVmZlzVK46pd5xLs3o1GfNGEv/z4Xdk5+b7XZqIVBFLt+zhhdnrSUyI4YL2jf0uR0REypHCoEgl0CY6gvd/ezZj+rXkv/M3c+kz37B2+36/yxKRSi47N58/vrecJnXC+evQjn6XIyIi5UxhUKSSCAsJ5i+XdGT8r3qz+2Aulz7zLePnbsI5LS4jIiXz2LR1bMg8yD+v6Ead8FC/yxERkXKmMChSyZx/RhSf/64f57RuyP1TVnHz+CR2HcjxuywRqWQWbtzNq99u5Pq+LTi3bSO/yxERER8oDIpUQo0iwnh1dC/uH9aROck7GTJuDt8k7/S7LBGpJA7m5HHXe8uJrV+Le4e097scERHxicKgSCVlZtx4Tks+vO0c6tQM5bpXFvDwp2s4klfgd2kiUsE9/NkaUvcc4l9XdqN2WIjf5YiIiE8UBkUquY7N6jD19nP5ZZ84/vP1Bi5/fi4bMg/4XZaIVFDfJO/kzflbuOmclvRu2cDvckRExEcKgyJVQM0awfzjsi785/qepO45xNCnv2FiUqoWlxGRn9ifncvdk5bTOqo2d13Uzu9yRETEZwqDIlXIRZ2a8Nmd/egaU5e7J63gjglL2Xc41++yRKSCeHDqarbvz+ZfV3YjPDTY73JERMRnCoMiVUzTujV56+a+/Omidnz23XYuHjeHpE27/S5LRHw2c80O3lucxm/6t6Z7XH2/yxERkQpAYVCkCgoOMm4b0IZJvz6L4CAj8T/zeHLG9+Tla3EZkepoz8Ej3Pv+Sto3iWTswLZ+lyMiIhVEicOgmbUzs2WFvvab2e/MrIGZTTezZO97fa+/mdlTZpZiZivMrEehY43y+ieb2ahC7T3NbKW3z1NmZqU7XZHqpXtcfT4Zey4jzmzOkzOSueal+aTtOeR3WSJSzu6fsoo9B4/w78RuhIVoeuj/t3fn4VWUd//H399sJCQhi4QAWQgqoMgWCAjUtSqituJS91Zccfm5tFpbW59q1S7aqo9aH1GsC1hFEbTgVsG1VkQJQthBqAGCIYAJCRACJLl/f8wAUUNQQmayfF7Xda4zZzIn94eTw33me+aee0RExLPfxaBzbplzboBzbgAwCKgEXgFuBd5xzvUA3vEfA5wC9PBvY4CxAGaWCtwBHAkMAe7YVUD621xZ53kj9zevSFuVGBvNA+cN4MHzBrCkeDOnPPQhr83/MuxYIhKQNxcUM63gS244oQdHdE0KO46IiDQjB2qY6AnASufcKmAUMN5fPx44w18eBUxwnllAspl1AU4GZjjnSp1zZcAMYKT/sw7OuVnOmxJxQp3fJSLf0xm5Gbxxw9EckpbAdc/P5VeTC6jaWRN2LBFpQhu3bOe2fy6kb0YS1xx3SNhxRESkmTlQxeD5wER/Od05V+wvrwPS/eUMYE2d5xT56xpaX1TPehHZT9kHteelq4dx3fGHMim/iEue/pSKKs02KtIaOee47ZUFbKmq5v5z+xMdqWkCRETk6xr9yWBmMcDpwEvf/Jl/RK/JL3RmZmPMLN/M8jds2NDUzYm0aNGREfzy5F48dP4A5qwq47zHZ7G+oirsWCJygE2d9yVvLSrh5hE96ZmeGHYcERFphg7E14SnAJ8550r8xyX+EE/8+/X++rVAVp3nZfrrGlqfWc/6b3HOjXPO5Tnn8tLS0hr5zxFpG0YNyODJ0YNZ9dVWzn5sJoUbt4YdSUQOkJKKKm6fupBB3VK44uiDw44jIiLN1IEoBi9gzxBRgGnArhlBRwNT66y/2J9VdChQ7g8nfQsYYWYp/sQxI4C3/J9VmNlQfxbRi+v8LhE5AI7pmcbzVw5l6/Yazh47kwVF5WFHEpFGcs7x6ynz2VFTy33n9CcyQhNxi4hI/RpVDJpZPHAS8HKd1fcAJ5nZ58CJ/mOAN4D/AiuAJ4BrAZxzpcDdwGz/dpe/Dn+bv/vPWQm82Zi8IvJtA7KSeenqYcRGR3L+uI/5z+cbw44kIo0wKX8N7y/bwK0jD6N7x/iw44iISDNm3ml9rUdeXp7Lz88PO4ZIi1NSUcXopz5l5YYt/O95A/hRv65hRxKR76morJKRD35I34wknrviSCJ0VFBEpE0ysznOubx9baepxUQEgPQOsbx41TBys1K4fuJcJnxcGHYkEfkeamsdv5o8H+ccf/lJPxWCIiKyTyoGRWS3pLhoJlw+hBMPT+f2qYu4f/oyWtvoAZHW6h+frGLmyq+47bTeZKW2DzuOiIi0ACoGReRrYqMjGXvRQM7Ly+Jv767gt68soLqmNuxYItKAwo1b+fMbSzmmZxoXDMna9xNERESAqLADiEjzExUZwT1n9yUtsR2PvLeCr7bs4OELcomNjgw7mrEU03EAABtASURBVIh8Q02t45bJBURFGvee3RdvAm4REZF905FBEamXmfHLk3vx+x/3ZsaSEi5+6lPKt+0MO5aIfMNT//mC2YVl/P7HR9AlKS7sOCIi0oKoGBSRBl3yg+48dH4uc1eXcd7jH7O+oirsSCLiW7F+M3+dvoyTeqdz1sCMsOOIiEgLo2JQRPbp9P5deeqSwawureSssTP5YuPWsCOJtHnVNbXcPKmA+JhI/nSmhoeKiMj3p2JQRL6To3uk8cKYoVTuqOEnY2cyv2hT2JFE2rTHPlhJQVE5d5/Rh7TEdmHHERGRFkjFoIh8Z/0yk5l89TDiYiK5YNwsPvx8Q9iRRNqkxV9W8NA7n/Ojfl34Ub+uYccREZEWSsWgiHwvB6clMOWa4WSltueyZ2YzreDLsCOJtCk7qmu5+aUCkuJiuHtUn7DjiIhIC6ZiUES+t/QOsbx41TBys1O4YeJcnv7oi7AjibQZf3v3c5YUV/Dns/qSEh8TdhwREWnBVAyKyH5JiotmwmVDGNE7nTtfXcx9by3DORd2LJFWrWDNJh59fyVnD8zkpN7pYccREZEWTsWgiOy32OhIHr1oIBcMyeKR91Zw65QFVNfUhh1LpFWq2lnDzS8VkJbQjtt/3DvsOCIi0gpEhR1ARFq2qMgI/nRmX9IS2vHwuysordzB3y7IJTY6MuxoIq3KAzOWs2L9FsZfNoSkuOiw44iISCugI4Mi0mhmxk0jenHn6Ufw9pISfvbkJ5RX7gw7lkirkV9YyhMf/pcLj8zm2J5pYccREZFWQsWgiBwwo4fn8PD5ucxbs4lzH/+YkoqqsCOJtHiVO6r55UsFZCTH8dtTDw87joiItCIqBkXkgPpx/648fckQisoqOevRmazcsCXsSCIt2r1vLqXwq0r++pP+JLTT2R0iInLgqBgUkQPuqB4deWHMMKp21nDOYx9TsGZT2JFEWqSZKzYy/uNVXPqDHIYdclDYcUREpJVRMSgiTaJvZhKTrxlO+5hILnhiFv9eviHsSCItyuaqndwyeT7dO8bzq5MPCzuOiIi0QioGRaTJdO8Yz8vXDCc7tT2XPTObqfPWhh1JpMX44+tLKC7fxn3n9CcuRrPziojIgadiUESaVKcOsUy6ehiDuqVw4wvzeOo/X4QdSaTZe2/Zel6YvYYxxxzCoG4pYccREZFWSsWgiDS5DrHRjL9sCCOP6Mxdry3m3n8txTkXdiyRZqm8cie3TplPz/QEfnFSj7DjiIhIK6ZiUEQCERsdyf9dNJALhmQz9v2V/HrKfKprasOOJdKslFRUcd3Ez9i4ZQf3nzOAdlEaHioiIk2nUcWgmSWb2WQzW2pmS8xsmJmlmtkMM/vcv0/xtzUze9jMVpjZfDMbWOf3jPa3/9zMRtdZP8jMFvjPedjMrDF5RSRckRHGn87sww0n9GBSfhFX/2MO23bUhB1LJHQ1tY4JHxdy4v0f8MkXpdw16gj6ZiaFHUtERFq5xh4ZfAj4l3PuMKA/sAS4FXjHOdcDeMd/DHAK0MO/jQHGAphZKnAHcCQwBLhjVwHpb3NlneeNbGReEQmZmXHTST25a9QRvLN0PRf+fRalW3eEHUskNIu+LOesRz/i9qmL6J+VzPSfH8NFR3YLO5aIiLQB+10MmlkScAzwJIBzbodzbhMwChjvbzYeOMNfHgVMcJ5ZQLKZdQFOBmY450qdc2XADGCk/7MOzrlZzju5aEKd3yUiLdzFw3J49MKBLPqygrPHzmT1V5VhRxIJ1Nbt1fzx9cWc/shHrN20jYfOH8Czlw8hp2N82NFERKSNaMyRwe7ABuBpM5trZn83s3gg3TlX7G+zDkj3lzOANXWeX+Sva2h9UT3rv8XMxphZvpnlb9iga5mJtBSn9O3C81ccSenWHZw19iPmF+ni9NI2vL24hJMe+IAnPvyCc/OyeOem4xg1IAOdDSEiIkFqTDEYBQwExjrncoGt7BkSCoB/RK/Jpwx0zo1zzuU55/LS0tKaujkROYDyclKZcs1w2kVFcv64Wby3bH3YkUSaTHH5Nq56Np8rJuSTEBvF5KuH8eez+pLUPjrsaCIi0gY1phgsAoqcc5/4jyfjFYcl/hBP/Ptde3Zrgaw6z8/01zW0PrOe9SLSyhzaKYFXrh1O947xXDE+n0mz1+z7SSItSE2t4+mPvuDE+z/gg+Ub+NXIXrx2/dHk5aSGHU1ERNqw/S4GnXPrgDVm1stfdQKwGJgG7JoRdDQw1V+eBlzszyo6FCj3h5O+BYwwsxR/4pgRwFv+zyrMbKg/i+jFdX6XiLQynTrE8uJVwxh+yEH8asp8Hnx7ua5FKK3CgqJyzvi/j7jz1cXk5aQy/efHcu1xhxITpas7iYhIuKIa+fzrgefMLAb4L3ApXoE5ycwuB1YB5/rbvgGcCqwAKv1tcc6VmtndwGx/u7ucc6X+8rXAM0Ac8KZ/E5FWKqFdFE9dMphbpyzgwbc/Z115FX84ow9RkdpplpZny/Zq7p++jPEzCzkooR2PXJjLaX276LxAERFpNqy1ffOel5fn8vPzw44hIo3gnOOBGcv527srOL5XGo9cOJD4do397kokOG8tWscdUxdRsrmKi47M5paTDyMpTucFiohIMMxsjnMub1/bae9KRJodM+PmEb3onBTL7/65kAuemMWToweTltgu7GgiDVq7aRt3TF3E20tKOKxzIo/+dCADs1P2/UQREZEQqBgUkWbroiO7kZ4Yy3UTP+PssTN55tLBHJyWEHYskW+prqnlmZmFPDBjOc7Bb089jEt/0J1oDXEWEZFmTJ9SItKsndg7nYlXDmXL9mrOHjuTz1aXhR1J5GsK1mzi9Ec+4g+vL2HowQcx/RfHMOaYQ1QIiohIs6dPKhFp9nKzU5hyzXA6xEVz4ROzmLG4JOxIIlRU7eSOqQs549GP+GrrdsZeNJAnR+eRldo+7GgiIiLfiYpBEWkRuneMZ8o1w+mVnshVz+bz7KxVYUeSNso5xxsLijnx/g+YMGsVo4fl8PZNx3KKZgoVEZEWRucMikiL0TGhHRPHDOW65+fyu38upHjTNm45uZd2wCUwa0oruWPaIt5dup7eXTow7uI8BmQlhx1LRERkv6gYFJEWpX1MFON+NojfTV3Io++vZF1FFfec1U8X8JYmtbOmlqf+8wUPvv05ZvA/px3OJcNzdA1MERFp0VQMikiLExUZwZ/O7EuXpDgemLGcDZu38+hFA0mM1XXc5MCbs6qM215ZwNJ1mzmpdzq/P/0IMpLjwo4lIiLSaCoGRaRFMjNuOKEHnZNi+c3LCzjv8Vk8felg0jvEhh1NWonybTv561tLee6T1XTuEMvjPxvEyUd0DjuWiIjIAaNiUERatHPzskjvEMs1/5jDWY/OZPxlgzm0U2LYsaQFc87x2vxi7nptMV9t2c6lw7tz04ieJLTTR6aIiLQuOtlBRFq8Y3umMemqYWyvruXssR8zu7A07EjSAjnnWF6ymdFPz+b6iXPp3CGWadcdxe0/7q1CUEREWiVzzoWd4YDKy8tz+fn5YccQkRCsKa1k9NOfUlS2jYfOG8ApfbuEHUmaseqaWhYXVzC7sIz8wlJmF5axcct24mMiueXkXvxsWA6REZqpVkREWh4zm+Ocy9vndioGRaQ1Kdu6g8vHz2bumk387rTeXHZU97AjSTOxZXs1c1eX7S7+5q3ZROWOGgAyU+IYnJNKXk4KJx2eTiedeyoiIi3Ydy0GNe5FRFqVlPgYnr9yKDdMnMtdry2muHwbvznlcCJ0hKfNKamoIr+wjNmFpeSvKmXxlxXUOogwOLxLB87NyyIvJ4W8bql0TlLxJyIibY+KQRFpdWKjIxn700Hc+eoinvjwC9ZVbOe+c/rRLioy7GjSRGprHSs3bNkz5HNVKWtKtwEQFx1JbnYy1x1/KHk5qeRmJ+syJCIiIqgYFJFWKjLCuPP0I+iSFMe9/1rK+ooqxl2cR1KcioDWYHt1DQuKyslf5RV/+avK2FS5E4COCTHkdUtl9LAcBuek0rtrB6J1cXgREZFvUTEoIq2WmXHNcYfQJSmWWyYXcM5jM3nm0iF01QXDW5zyyp3MWV26+8hfQVE5O6prATg4LZ6Te3f2hnzmpJJzUHvMNCxYRERkX1QMikird0ZuBmmJ7bj6We9ahM9cNpjDOncIO5bshXOOorJt5K8qJb+wjPzCMpaVbAYgKsLom5nE6GHdyMtJJa9bCgcltAs5sYiISMuk2URFpM1YUlzBJU9/SuX2Gh6/eBDDD+kYdiQBdlTXsqS4grmry/xhn2Wsq6gCILFdFAO7pTDYP+rXPzOZuBid+ykiItIQXVpCRKQeazdt45KnPqXwq63cd05/Rg3ICDtSm1Ncvo25qzcxd3UZn63exIK1e4Z8dkmKZXBOKoNzUhjULZVenRN1rT8REZHvSZeWEBGpR0ZyHJOvHs6YZ/O58YV5FJdXceXRB6vgaCJVO2tYsLacuavL/AJw0+6jfjFREfTL8IZ85manMCArWedzioiIBEhHBkWkTdpeXcNNkwp4fX4x7WMi6dM1iX6ZSfTLSqZ/ZhLZqZqE5PtyzrHqq0rmrtlT+C0prqC61vuc6XZQe3KzksnNTiE3O5nDOncgJkqzfIqIiBxogQwTNbNCYDNQA1Q75/LMLBV4EcgBCoFznXNl5u1VPQScClQClzjnPvN/z2jgf/xf+wfn3Hh//SDgGSAOeAO40e0jsIpBEfmuamsdry8o3j075eLiit3DFZPbR9M3I4n+mcn0zfTudWHyr9tctZOCNf5RvzXesM8y//IO8TGR9M9KJjc7mdysFAZkJ9NRE72IiIgEIshiMM85t7HOur8Apc65e8zsViDFOfdrMzsVuB6vGDwSeMg5d6RfPOYDeYAD5gCD/ALyU+AG4BO8YvBh59ybDWVSMSgi+2tnTS3L1m1mflE584s2UVBUzvKSzdT4R7Y6JbajX6Z35LBfVjL9MpJIiY8JOXUwamsdKzZs+dpwz+XrN7PrI6RHpwSv8POP+vXopHP9REREwhLmOYOjgOP85fHA+8Cv/fUT/CN7s8ws2cy6+NvOcM6VApjZDGCkmb0PdHDOzfLXTwDOABosBkVE9ld0ZAR9MpLok5HEhUdmA7BtRw2Li8v9ArGcgqJNvL2kZPdzslLj9hSImcn0yUgioV3LPx27dOsO5tUZ7lmwZhObt1cD3lHT3KxkTuvXhdzsZPplJpMUFx1yYhEREfm+GrvH4oDpZuaAx51z44B051yx//N1QLq/nAGsqfPcIn9dQ+uL6lkvIhKYuJhIBnVLZVC31N3rKqp2srConIKichas3cS81Zt4fb7X7ZnBoWkJu4eW9stM4vAuHYiNbl6XQ3DOUbmjhs1V1Wyu2kn5tp0sLq7YPctn4VeVAERGGId1TmRUblcGZqeQm52ii7qLiIi0Eo0tBo9yzq01s07ADDNbWveHzjnnF4pNyszGAGMAsrOzm7o5EWnjOsRGM/zQjgw/dM91Cjdu2c4C/8jh/KJy/r18Ay9/thaA6EijV+fE3UcQ+2Yk0zM9gajI/Zs8pbbWsWVHNZurqtniF3Obq6qp8O8311m3ue667Xseb9levXv4a11pie0YmJ3M+UOyyc3yzpdsH9Pyj3SKiIjItzXqE945t9a/X29mrwBDgBIz6+KcK/aHga73N18LZNV5eqa/bi17hpXuWv++vz6znu3ryzEOGAfeOYON+TeJiOyPjgntOP6wThx/WCfAO/JWXF61+9zD+UWbeLXgS57/ZDUAsdERHOHPYNqnaxIREdQp5PZSzO0q5HZUs6/TvSMjjMTYKO/WLprE2CgykuNIjE3csz42+mv3PdMT6ZoUq6N+IiIibcR+F4NmFg9EOOc2+8sjgLuAacBo4B7/fqr/lGnAdWb2At4EMuV+wfgW8CczS/G3GwH8xjlXamYVZjYUbwKZi4G/7W9eEZEgmRldk+PomhzHyD5dAO+I3qrSSq9AXOMViBM/XU3VztqvPTcmMoLE2CgS6hRz3Q5qv7to61BPIZfwjfVx0ZEq6kRERKRBjTkymA684u9sRAHPO+f+ZWazgUlmdjmwCjjX3/4NvJlEV+BdWuJSAL/ouxuY7W93167JZIBr2XNpiTfR5DEi0oJFRBjdO8bTvWM8owZ4p0BX19RS+FUlEcbuQq65nV8oIiIirZMuOi8iIiIiItKKfNdLS+zf7AUiIiIiIiLSoqkYFBERERERaYNUDIqIiIiIiLRBKgZFRERERETaIBWDIiIiIiIibZCKQRERERERkTZIxaCIiIiIiEgbpGJQRERERESkDVIxKCIiIiIi0gapGBQREREREWmDzDkXdoYDysw2AKvCzlGPjsBGZVAGZWh2GaB55FAGZVAGZVCG5p8BmkcOZVCGfenmnEvb10atrhhsrsws3zmXpwzKoAzNK0NzyaEMyqAMyqAMzT9Dc8mhDMpwoGiYqIiIiIiISBukYlBERERERKQNUjEYnHFhB0AZdlEGjzLs0RxyKINHGTzK4FEGjzJ4mkMGaB45lMGjDI2kcwZFRERERETaIB0ZFBERERERaYNUDDYxMxtpZsvMbIWZ3RpShqfMbL2ZLQyjfT9Dlpm9Z2aLzWyRmd0YQoZYM/vUzAr8DHcGnaFOlkgzm2tmr4XUfqGZLTCzeWaWH1KGZDObbGZLzWyJmQ0LuP1e/r9/163CzH4eZAY/xy/89+NCM5toZrEhZLjRb39RkK9BfX2TmaWa2Qwz+9y/Twkhwzn+a1FrZk0+Q9xeMvzV/78x38xeMbPkEDLc7bc/z8ymm1nXoDPU+dnNZubMrGPQGczs92a2tk5fcWrQGfz11/vviUVm9pegM5jZi3Veg0IzmxdChgFmNmvXZ5eZDQkhQ38z+9j/DH3VzDo0cYZ695+C7CsbyBBYX9lAhsD6ygYyBNpXHnDOOd2a6AZEAiuBg4EYoADoHUKOY4CBwMIQX4suwEB/ORFYHvRrARiQ4C9HA58AQ0N6PW4CngdeC6n9QqBjWO8HP8N44Ap/OQZIDjFLJLAO75o8QbabAXwBxPmPJwGXBJyhD7AQaA9EAW8DhwbU9rf6JuAvwK3+8q3AvSFkOBzoBbwP5IX0OowAovzle0N6HTrUWb4BeCzoDP76LOAtvGsIN2m/tZfX4ffAL5v6fbCPDMf7/zfb+Y87hfG3qPPz+4HbQ3gdpgOn+MunAu+HkGE2cKy/fBlwdxNnqHf/Kci+soEMgfWVDWQIrK9sIEOgfeWBvunIYNMaAqxwzv3XObcDeAEYFXQI59y/gdKg2/1GhmLn3Gf+8mZgCd6OcJAZnHNui/8w2r8FftKsmWUCpwF/D7rt5sLMkvA+ZJ8EcM7tcM5tCjHSCcBK59yqENqOAuLMLAqvIPsy4PYPBz5xzlU656qBD4Czgmh4L33TKLwvCvDvzwg6g3NuiXNuWVO2+x0yTPf/HgCzgMwQMlTUeRhPE/eXDXxW/S/wq6Zufx8ZArOXDNcA9zjntvvbrA8hAwBmZsC5wMQQMjhg15G4JJq4v9xLhp7Av/3lGcDZTZxhb/tPgfWVe8sQZF/ZQIbA+soGMgTaVx5oKgabVgawps7jIgIugJojM8sBcvGOzAXddqQ/tGU9MMM5F3gG4EG8HZvaENrexQHTzWyOmY0Jof3uwAbgafOGy/7dzOJDyLHL+TTxjk19nHNrgfuA1UAxUO6cmx5wjIXA0WZ2kJm1x/u2PSvgDHWlO+eK/eV1QHqIWZqLy4A3w2jYzP5oZmuAi4DbQ2h/FLDWOVcQdNvfcJ0/DOypph66vBc98f6ffmJmH5jZ4BAy7HI0UOKc+zyEtn8O/NV/T94H/CaEDIvY88X+OQTYX35j/ymUvjLMfbjvkCGwvvKbGcLuKxtDxaAEyswSgCnAz7/xTUognHM1zrkBeN8cDTGzPkG2b2Y/AtY75+YE2W49jnLODQROAf6fmR0TcPtReENvxjrncoGteMNcAmdmMcDpwEshtJ2Ct1PRHegKxJvZT4PM4Jxbgje0ZjrwL2AeUBNkhr1x3pibFvUN64FmZrcB1cBzYbTvnLvNOZflt39dkG37X078lvB3rMYChwAD8L60uT+EDFFAKjAUuAWY5B+hC8MFhPDlme8a4Bf+e/IX+KNLAnYZcK2ZzcEbKrgjiEYb2n8Kqq8Mex+uoQxB9pX1ZQizr2wsFYNNay1f/8Yo01/XJplZNN5/nueccy+HmcUfkvgeMDLgpn8AnG5mhXjDhn9oZv8IOMOuI1K7hhq9gjekOUhFQFGdI7OT8YrDMJwCfOacKwmh7ROBL5xzG5xzO4GXgeFBh3DOPemcG+ScOwYowzsPIiwlZtYFwL9v0uFwzZmZXQL8CLjI39kL03M08XC4ehyC90VJgd9nZgKfmVnnIEM450r8LxJrgScIvr8Er8982T/d4VO8kSVNOplOffzh7GcBLwbdtm80Xj8J3hd4gf8tnHNLnXMjnHOD8IrilU3d5l72nwLtK5vDPtzeMgTZV36H1yGMvrJRVAw2rdlADzPr7h99OB+YFnKmUPjfYD4JLHHOPRBShrRds0yZWRxwErA0yAzOud845zKdczl474d3nXOBHgkys3gzS9y1jHfydaAzzTrn1gFrzKyXv+oEYHGQGeoI81vu1cBQM2vv/x85Ae8chECZWSf/PhtvR+/5oDPUMQ1vhw//fmqIWUJjZiPxhpOf7pyrDClDjzoPRxF8f7nAOdfJOZfj95lFeJM3rAsyx64dbt+ZBNxf+v6JN4kMZtYTb9KtjSHkOBFY6pwrCqFt8M4RPNZf/iEQ+FDVOv1lBPA/wGNN3N7e9p8C6yubyT5cvRmC7CsbyBBqX9loLuAZa9raDe/8m+V43xzdFlKGiXhDW3bifZheHkKGo/CGMMzHG4Y2Dzg14Az9gLl+hoU08Uxo3yHPcYQwmyje7LYF/m1RiO/LAUC+//f4J5ASQoZ44CsgKcT3wZ14HxwLgWfxZwsMOMOHeMV4AXBCgO1+q28CDgLewdvJextIDSHDmf7ydqAEeCuEDCvwzjnf1V829Uye9WWY4r8v5wOv4k2UEGiGb/y8kKafTbS+1+FZYIH/OkwDuoSQIQb4h//3+Az4YRh/C+AZ4OqmbHsfr8NRwBy/r/oEGBRChhvx9uuWA/cA1sQZ6t1/CrKvbCBDYH1lAxkC6ysbyBBoX3mgb+b/40RERERERKQN0TBRERERERGRNkjFoIiIiIiISBukYlBERERERKQNUjEoIiIiIiLSBqkYFBERERERaYNUDIqIiIiIiLRBKgZFRERERETaIBWDIiIiIiIibdD/B2aGV/hDOFS7AAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# 画个图表\n", + "hourcount = (group_by_hour_pandas_df.to_dict())['count']\n", + "index = list(range(0,24))\n", + "cols = []\n", + "for i in index:\n", + " if i not in hourcount:\n", + " cols.append(0)\n", + " else:\n", + " cols.append(hourcount[i])\n", + "\n", + "group_by_hour_pandas_df = pd.DataFrame({'num': cols})\n", + "\n", + "\n", + "group_by_hour_pandas_df.plot(title='vas project-access count by hour', kind='line', figsize=(15, 5), xticks=group_by_hour_pandas_df.index)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 例4. 每个月访问量" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 116 ms, sys: 0 ns, total: 116 ms\n", + "Wall time: 10.3 s\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA4MAAAFgCAYAAAAB2Kz/AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvqOYd8AAAGuNJREFUeJzt3XuQXmWdJ/DvjwSMCHIJKXYg7HR2ZVR0wEsKGVlHAUfjaG3cWi94geiysrXqMrvu1oj6h7XjZbCcVcRSqyhBoYoSkbGUGhlZStFa7wZxZBAvKY0SvGUSvK2FgDz7Rx/YNtPpNJx03rz9fD5Vb+Wc59x+XcWvwzfnOeet1loAAADoywGTLgAAAIB9TxgEAADokDAIAADQIWEQAACgQ8IgAABAh4RBAACADgmDAAAAHRIGAQAAOiQMAgAAdEgYBAAA6NDKSRewtx111FFtZmZm0mUAAABMxI033vhPrbU1e9pv2YXBmZmZbN68edJlAAAATERV/WAx+5kmCgAA0CFhEAAAoEPCIAAAQIeW3TODAABAP+6+++5s27Ytd95556RL2edWrVqVtWvX5sADD3xQxwuDAADA1Nq2bVsOPfTQzMzMpKomXc4+01rLjh07sm3btqxbt+5BncM0UQAAYGrdeeedWb16dVdBMEmqKqtXrx51R1QYBAAAplpvQfA+Y39uYRAAAGA/deGFF+Y3v/nNkpzbM4MAAMCyMXP+J/bq+bZe8Oy9er4H6sILL8xLX/rSHHzwwXv93O4MAgAAjHD55ZfnxBNPzEknnZSzzjorW7duzemnn54TTzwxZ5xxRn74wx8mSV72spfl6quvvv+4Qw45JEnymc98Jk972tPyvOc9L4961KPykpe8JK21XHTRRfnRj36U0047Laeddtper9udQQBgQXv7X9nZuyZ91wJ6d8stt+TNb35zvvCFL+Soo47Kzp07s2nTpvs/l156ac4777x87GMfW/A8N910U2655ZYcc8wxOfXUU/P5z38+5513Xt7xjnfkhhtuyFFHHbXXa3dnEAAA4EH69Kc/nec///n3h7UjjzwyX/ziF/PiF784SXLWWWflc5/73B7Pc/LJJ2ft2rU54IAD8rjHPS5bt25dyrKTCIMAAAD7xMqVK3PvvfcmSe69997cdddd9297yEMecv/yihUrcs899yx5PcIgAADAg3T66afnIx/5SHbs2JEk2blzZ5785CfnyiuvTJJcccUVecpTnpIkmZmZyY033pgkueaaa3L33Xfv8fyHHnpofvWrXy1J7Z4ZBAAAeJAe85jH5A1veEOe+tSnZsWKFXn84x+fd7/73Xn5y1+et7/97VmzZk0+8IEPJEle8YpXZOPGjTnppJOyYcOGPOxhD9vj+c8999xs2LAhxxxzTG644Ya9Wnu11vbqCSdt/fr1bfPmzZMuAwCWDS+Q2b95gQy9u/XWW/PoRz960mVMzHw/f1Xd2Fpbv6djTRMFAADokDAIAADQIWEQAACgQ8IgAAAw1Zbbe1AWa+zPLQwCAABTa9WqVdmxY0d3gbC1lh07dmTVqlUP+hy+WgIAAJhaa9euzbZt27J9+/ZJl7LPrVq1KmvXrn3QxwuDAADA1DrwwAOzbt26SZcxlUwTBQAA6JAwCAAA0CFhEAAAoEPCIAAAQIeEQQAAgA4JgwAAAB0SBgEAADokDAIAAHRIGAQAAOiQMAgAANAhYRAAAKBDwiAAAECHFhUGq+q/VdUtVfWPVfWhqlpVVeuq6stVtaWqPlxVBw37PmRY3zJsn5lzntcN49+uqmfOGd8wjG2pqvPnjM97DQAAAMbZYxisqmOTnJdkfWvtsUlWJDkzyduSvLO19ogkdyQ5ZzjknCR3DOPvHPZLVZ0wHPeYJBuSvLeqVlTViiTvSfKsJCckedGwbxa4BgAAACMsdproyiQPraqVSQ5O8uMkpye5eth+WZLnDssbh/UM28+oqhrGr2yt/ba19v0kW5KcPHy2tNa+11q7K8mVSTYOx+zuGgAAAIywxzDYWrs9yd8k+WFmQ+AvktyY5OettXuG3bYlOXZYPjbJbcOx9wz7r547vssxuxtfvcA1AAAAGGEx00SPyOxdvXVJjknysMxO89xvVNW5VbW5qjZv37590uUAAADs9xYzTfTpSb7fWtveWrs7yUeTnJrk8GHaaJKsTXL7sHx7kuOSZNh+WJIdc8d3OWZ34zsWuMbvaa1d3Fpb31pbv2bNmkX8SAAAAH1bTBj8YZJTqurg4Tm+M5J8M8kNSZ437LMpyceH5WuG9QzbP91aa8P4mcPbRtclOT7JV5J8Ncnxw5tDD8rsS2auGY7Z3TUAAAAYYTHPDH45sy9x+VqSm4djLk7y2iSvqaotmX2+75LhkEuSrB7GX5Pk/OE8tyS5KrNB8pNJXtVa+93wTOCrk1yX5NYkVw37ZoFrAAAAMELN3oBbPtavX982b9486TIAYNmYOf8Tky6BBWy94NmTLgHYz1TVja219Xvab7FfLQEAAMAyIgwCAAB0SBgEAADokDAIAADQIWEQAACgQ8IgAABAh4RBAACADgmDAAAAHRIGAQAAOiQMAgAAdEgYBAAA6JAwCAAA0CFhEAAAoEPCIAAAQIeEQQAAgA4JgwAAAB0SBgEAADokDAIAAHRIGAQAAOiQMAgAANAhYRAAAKBDwiAAAECHhEEAAIAOCYMAAAAdEgYBAAA6JAwCAAB0SBgEAADokDAIAADQIWEQAACgQ8IgAABAh4RBAACADgmDAAAAHRIGAQAAOiQMAgAAdEgYBAAA6JAwCAAA0CFhEAAAoEPCIAAAQIeEQQAAgA4JgwAAAB0SBgEAADokDAIAAHRIGAQAAOiQMAgAANAhYRAAAKBDwiAAAECHhEEAAIAOCYMAAAAdEgYBAAA6tKgwWFWHV9XVVfWtqrq1qv6kqo6squur6rvDn0cM+1ZVXVRVW6rqG1X1hDnn2TTs/92q2jRn/IlVdfNwzEVVVcP4vNcAAABgnMXeGXxXkk+21h6V5KQktyY5P8mnWmvHJ/nUsJ4kz0py/PA5N8n7ktlgl+SNSZ6U5OQkb5wT7t6X5BVzjtswjO/uGgAAAIywxzBYVYcl+dMklyRJa+2u1trPk2xMctmw22VJnjssb0xyeZv1pSSHV9UfJHlmkutbaztba3ckuT7JhmHbw1trX2qttSSX73Ku+a4BAADACIu5M7guyfYkH6iqm6rq/VX1sCRHt9Z+POzzkyRHD8vHJrltzvHbhrGFxrfNM54FrvF7qurcqtpcVZu3b9++iB8JAACgb4sJgyuTPCHJ+1prj0/yf7PLdM3hjl7b++Ut7hqttYtba+tba+vXrFmzlGUAAAAsC4sJg9uSbGutfXlYvzqz4fCnwxTPDH/+bNh+e5Lj5hy/dhhbaHztPONZ4BoAAACMsMcw2Fr7SZLbquqRw9AZSb6Z5Jok970RdFOSjw/L1yQ5e3ir6ClJfjFM9bwuyTOq6ojhxTHPSHLdsO2XVXXK8BbRs3c513zXAAAAYISVi9zvvyS5oqoOSvK9JC/PbJC8qqrOSfKDJC8Y9r02yZ8n2ZLkN8O+aa3trKo3JfnqsN9ftdZ2DsuvTPLBJA9N8vfDJ0ku2M01AAAAGGFRYbC19vUk6+fZdMY8+7Ykr9rNeS5Ncuk845uTPHae8R3zXQMAAIBxFvs9gwAAACwji50mCjCVZs7/xKRLYA+2XvDsSZcAAF1yZxAAAKBDwiAAAECHhEEAAIAOCYMAAAAdEgYBAAA6JAwCAAB0SBgEAADokDAIAADQIWEQAACgQ8IgAABAh4RBAACADgmDAAAAHRIGAQAAOiQMAgAAdEgYBAAA6JAwCAAA0CFhEAAAoEPCIAAAQIeEQQAAgA4JgwAAAB0SBgEAADokDAIAAHRIGAQAAOiQMAgAANAhYRAAAKBDwiAAAECHhEEAAIAOCYMAAAAdEgYBAAA6JAwCAAB0SBgEAADokDAIAADQIWEQAACgQ8IgAABAh4RBAACADgmDAAAAHRIGAQAAOiQMAgAAdEgYBAAA6JAwCAAA0CFhEAAAoEPCIAAAQIeEQQAAgA4JgwAAAB0SBgEAADokDAIAAHRo0WGwqlZU1U1V9XfD+rqq+nJVbamqD1fVQcP4Q4b1LcP2mTnneN0w/u2qeuac8Q3D2JaqOn/O+LzXAAAAYJwHcmfwL5LcOmf9bUne2Vp7RJI7kpwzjJ+T5I5h/J3DfqmqE5KcmeQxSTYkee8QMFckeU+SZyU5IcmLhn0XugYAAAAjLCoMVtXaJM9O8v5hvZKcnuTqYZfLkjx3WN44rGfYfsaw/8YkV7bWftta+36SLUlOHj5bWmvfa63dleTKJBv3cA0AAABGWOydwQuT/GWSe4f11Ul+3lq7Z1jfluTYYfnYJLclybD9F8P+94/vcszuxhe6BgAAACPsMQxW1XOS/Ky1duM+qOdBqapzq2pzVW3evn37pMsBAADY7y3mzuCpSf5tVW3N7BTO05O8K8nhVbVy2GdtktuH5duTHJckw/bDkuyYO77LMbsb37HANX5Pa+3i1tr61tr6NWvWLOJHAgAA6Nsew2Br7XWttbWttZnMvgDm0621lyS5Icnzht02Jfn4sHzNsJ5h+6dba20YP3N42+i6JMcn+UqSryY5fnhz6EHDNa4ZjtndNQAAABhhzPcMvjbJa6pqS2af77tkGL8kyeph/DVJzk+S1totSa5K8s0kn0zyqtba74ZnAl+d5LrMvq30qmHfha4BAADACCv3vMv/11r7TJLPDMvfy+ybQHfd584kz9/N8W9J8pZ5xq9Ncu084/NeAwAAgHHG3BkEAABgSgmDAAAAHRIGAQAAOiQMAgAAdEgYBAAA6JAwCAAA0CFhEAAAoEPCIAAAQIeEQQAAgA4JgwAAAB1aOekCAABgOZs5/xOTLoE92HrBsyddwkS4MwgAANAhYRAAAKBDwiAAAECHhEEAAIAOCYMAAAAdEgYBAAA6JAwCAAB0SBgEAADokDAIAADQIWEQAACgQ8IgAABAh4RBAACADgmDAAAAHRIGAQAAOiQMAgAAdEgYBAAA6JAwCAAA0CFhEAAAoEPCIAAAQIeEQQAAgA4JgwAAAB0SBgEAADokDAIAAHRIGAQAAOiQMAgAANAhYRAAAKBDKyddAAubOf8Tky6BPdh6wbMnXQIAADxg7gwCAAB0SBgEAADokDAIAADQIWEQAACgQ8IgAABAh4RBAACADgmDAAAAHRIGAQAAOiQMAgAAdEgYBAAA6JAwCAAA0KE9hsGqOq6qbqiqb1bVLVX1F8P4kVV1fVV9d/jziGG8quqiqtpSVd+oqifMOdemYf/vVtWmOeNPrKqbh2Muqqpa6BoAAACMs5g7g/ck+e+ttROSnJLkVVV1QpLzk3yqtXZ8kk8N60nyrCTHD59zk7wvmQ12Sd6Y5ElJTk7yxjnh7n1JXjHnuA3D+O6uAQAAwAh7DIOttR+31r42LP8qya1Jjk2yMcllw26XJXnusLwxyeVt1peSHF5Vf5DkmUmub63tbK3dkeT6JBuGbQ9vrX2ptdaSXL7Luea7BgAAACM8oGcGq2omyeOTfDnJ0a21Hw+bfpLk6GH52CS3zTls2zC20Pi2ecazwDUAAAAYYdFhsKoOSfK3Sf5ra+2Xc7cNd/TaXq7t9yx0jao6t6o2V9Xm7du3L2UZAAAAy8KiwmBVHZjZIHhFa+2jw/BPhymeGf782TB+e5Lj5hy+dhhbaHztPOMLXeP3tNYubq2tb62tX7NmzWJ+JAAAgK4t5m2ileSSJLe21t4xZ9M1Se57I+imJB+fM3728FbRU5L8YpjqeV2SZ1TVEcOLY56R5Lph2y+r6pThWmfvcq75rgEAAMAIKxexz6lJzkpyc1V9fRh7fZILklxVVeck+UGSFwzbrk3y50m2JPlNkpcnSWttZ1W9KclXh/3+qrW2c1h+ZZIPJnlokr8fPlngGgAAAIywxzDYWvtcktrN5jPm2b8ledVuznVpkkvnGd+c5LHzjO+Y7xoAAACM84DeJgoAAMDyIAwCAAB0SBgEAADokDAIAADQIWEQAACgQ8IgAABAh4RBAACADgmDAAAAHRIGAQAAOiQMAgAAdEgYBAAA6JAwCAAA0CFhEAAAoEPCIAAAQIeEQQAAgA4JgwAAAB0SBgEAADokDAIAAHRIGAQAAOiQMAgAANAhYRAAAKBDwiAAAECHhEEAAIAOCYMAAAAdEgYBAAA6JAwCAAB0SBgEAADokDAIAADQIWEQAACgQ8IgAABAh4RBAACADgmDAAAAHRIGAQAAOiQMAgAAdEgYBAAA6JAwCAAA0CFhEAAAoEPCIAAAQIeEQQAAgA4JgwAAAB0SBgEAADokDAIAAHRIGAQAAOiQMAgAANAhYRAAAKBDwiAAAECHhEEAAIAOCYMAAAAd2u/DYFVtqKpvV9WWqjp/0vUAAAAsB/t1GKyqFUnek+RZSU5I8qKqOmGyVQEAAEy//ToMJjk5yZbW2vdaa3cluTLJxgnXBAAAMPX29zB4bJLb5qxvG8YAAAAYYeWkC9gbqurcJOcOq7+uqm9Psh4WdFSSf5p0EXtTvW3SFdChZdVHeogJ0EMwzrLqoWRZ9tEfLman/T0M3p7kuDnra4ex39NauzjJxfuqKB68qtrcWls/6TpgmukjGEcPwTh6aPnY36eJfjXJ8VW1rqoOSnJmkmsmXBMAAMDU26/vDLbW7qmqVye5LsmKJJe21m6ZcFkAAABTb78Og0nSWrs2ybWTroO9xnReGE8fwTh6CMbRQ8tEtdYmXQMAAAD72P7+zCAAAABLQBgEAADokDAIAADQIWEQAACgQ/v920SZXlX1jiR/21r7/KRrgWlUVUcmeXWSHyW5JMnrk/xJkluTvLW1dscEy4OpUVWnJfn3SY5L8rsk30ny/tbalokWBlOgqlYmOSfJv0tyzDB8e5KPJ7mktXb3pGpjPG8TZclU1fYkP0iyJsmHk3yotXbTZKuC6VFV1ya5OcnDkzx6WL4qyZ8lOam1tnGC5cFUqKq/TvIvknwqyXOTfD+zYfCVmf1HlY9MsDzY71XVh5L8PMllSbYNw2uTbEpyZGvthZOqjfGEQZZMVd3UWnt8Vf1RkhcmOTPJiiQfymww/M5EC4T9XFV9vbX2uKqqJNtaa8fuum2C5cFUqKqbW2t/PCyvTPLZ1tqpVXVEkv/TWnvsZCuE/VtVfae19kcPdBvTwTODLKWWJK2177TW3tRae0ySFyRZleTaiVYG0+GA4X9Yj0tySFXNJElVrU5y0ATrgmly7zDlOpmd4rYiSYZp1jWxqmB67Kyq51fV/bmhqg6oqhcm8bjClPPMIEvpn/0l21r7RpJvJHndvi8Hps5fJ/nWsPwfkrx/9iZhHp3kf06qKJgyb01yU1V9J8kjk/znJKmqNUn+YZKFwZQ4M8nbkry3qu4Lf4cnuWHYxhQzTZQlU1WHtNZ+Pek6YJpV1YrM/q6+Z5ji9rgkt7fWfjzh0mBqDHcG/1WSLa21n0+6HphWw8yUtNZ2TLoW9g53BlkyrbVfV9VhSTYkue9Zp9uTXOcvY1i0Q5JsqKq5PeQNiPDA/C7Jv07yp8PddX8XwYOwawisqj9rrV0/qXoYzzODLJmqOjvJ15I8LcnBw+e0JDcO24AF6CEYTx/Bkrpk0gUwjmmiLJmq+naSJ+36L6/DCzG+7O1TsDA9BOPpIxinqq7Z3aYkp7fWHrYv62HvMk2UpVQZ3ii6i3vjDW6wGHoIxtNHMM5Tkrw0ya7vgagkJ+/7ctibhEGW0luSfK2q/neS24axf5nZL8x+08Sqgumhh2A8fQTjfCnJb1prn911w3DnnSlmmihLapiG88z88xfI+F4aWAQ9BOPpI4D5CYPsU1X1nNba3026DphWegjG00cwjh5aPoRB9qmq+lpr7QmTrgOmlR6C8fQRjKOHlg9fLcG+5mF9GEcPwXj6CMbRQ8uEMMi+9p8mXQBMOT0E4+kjGEcPLRPCIEumqg6qqrOr6unD+ouTnF1Vr6qqAydcHuz39BCMp49gHD20vHlmkCVTVVdk9utLDk7y8ySHJPlokjMy+9/epgmWB/s9PQTj6SMYRw8tb8IgS6aqvtFaO7GqVmb2Nd7HtNZ+V1WV5B9aaydOuETYr+khGE8fwTh6aHkzTZSldEBVHZTk0Mz+a9Jhw/hDkphWAHumh2A8fQTj6KFlbOWkC2BZuyTJt5KsSPKGJB+pqu8lOSXJlZMsDKaEHoLx9BGMo4eWMdNEWVJVdUyStNZ+VFWHJ3l6kh+21r4y2cpgOughGE8fwTh6aPkSBpmIqjqktfbrSdcB00oPwXj6CMbRQ9PPM4NMyjcnXQBMOT0E4+kjGEcPTTnPDLJkquo1u9uU2dcSAwvQQzCePoJx9NDy5s4gS+mtSY7I7Nun5n4Oif/2YDH0EIynj2AcPbSMuTPIUvpako+11m7cdUNV/ccJ1APTRg/BePoIxtFDy5gXyLBkquqRSXa21rbPs+3o1tpPJ1AWTA09BOPpIxhHDy1vwiAAAECHzPNlyVTVYVV1QVV9q6p2VtWOqrp1GDt80vXB/k4PwXj6CMbRQ8ubMMhSuirJHUme1lo7srW2Oslpw9hVE60MpoMegvH0EYyjh5Yx00RZMlX17dbaIx/oNmCWHoLx9BGMo4eWN3cGWUo/qKq/rKqj7xuoqqOr6rVJbptgXTAt9BCMp49gHD20jAmDLKUXJlmd5LNVdUdV7UzymSRHJnnBJAuDKaGHYDx9BOPooWXMNFGWVFU9KsnaJF9qrf16zviG1tonJ1cZTAc9BOPpIxhHDy1f7gyyZKrqvCQfT/LqJP9YVRvnbH7rZKqC6aGHYDx9BOPooeVt5aQLYFl7RZInttZ+XVUzSa6uqpnW2ruS1EQrg+mgh2A8fQTj6KFlTBhkKR1w31SC1trWqnpaZn+B/GH88oDF0EMwnj6CcfTQMmaaKEvpp1X1uPtWhl8kz0lyVJI/nlhVMD30EIynj2AcPbSMeYEMS6aq1ia5p7X2k3m2ndpa+/wEyoKpoYdgPH0E4+ih5U0YBAAA6JBpogAAAB0SBgEAADokDAIAAHRIGAQAAOiQMAgAI1TVTFV9q6o+WFXfqaorqurpVfX5qvpuVZ086RoBYD7CIACM94gk/yvJo4bPi5P8myT/I8nrJ1gXAOyWMAgA432/tXZza+3eJLck+VSb/e6mm5PMTLQyANgNYRAAxvvtnOV756zfm2Tlvi8HAPZMGAQAAOiQMAgAANChmn2kAQAAgJ64MwgAANAhYRAAAKBDwiAAAECHhEEAAIAOCYMAAAAdEgYBAAA6JAwCAAB0SBgEAADo0P8D9Ev2dZlARwIAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "%%time\n", + "group_by_month =df.select('month').groupBy('month').count()\n", + "\n", + "group_by_month_pandas = group_by_month.limit(5).toPandas()\n", + "group_by_month_pandas.index = group_by_month_pandas['month']\n", + "group_by_month_pandas.index.name = 'm';\n", + "group_by_month_pandas = group_by_month_pandas.sort_values(by='month', ascending=True)\n", + "\n", + "## 每月uv\n", + "group_by_month_pandas[['count']].plot(kind='bar', figsize=(15, 5))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/language/Go.md b/docs/language/Go.md index c1031a0..7d7602e 100644 --- a/docs/language/Go.md +++ b/docs/language/Go.md @@ -256,6 +256,75 @@ channel收发遵循FIFO原则,其底层是hchan结构指针,创建通道使 - slice - channel +### 为啥 channel 会有 close 这个操作, 在哪些场景下会用到这个操作 ? + +在 Go 语言中,channel 的 close 操作用于向 channel 的接收方明确地通知发送操作已经完成。关闭一个 channel 可以表达“没有更多的数据将被发送到这个 channel”这一信号。这是一种控制信号,帮助接收方理解数据流的生命周期,并且可以避免在 channel 上进行无限等待。 + +#### 使用 close 的场景 + +1. **通知多个接收者完成处理**: + + 当使用一个 channel 来分发任务或数据给多个协程(goroutines)时,关闭 channel 是一种告知所有接收者没有更多数据要处理的有效方法。接收者可以通过检测 channel 是否已关闭来适时停止处理。 + +2. **控制循环退出**: + + 在接收数据时,可以使用 for range 循环从 channel 接收数据。当 channel 被关闭,并且 channel 中已经没有待处理的数据时,for range 循环会自动结束。这使得编码简洁,并且逻辑清晰。 + +3. **防止资源泄露**: + + 如果不关闭不再使用的 channel,可能会导致内存资源没有得到释放,特别是在 channel 还保持着一些数据项的情况下。尽管 Go 的垃圾回收机制会回收未引用的对象,但显式关闭 channel 是一个好的实践,它可以清晰地表达程序设计者的意图。 + +4. **使用 select 的默认操作**: + + 在使用 select 语句处理多个 channel 的时候,关闭一个 channel 可以用于触发其他 case 的执行。特别是在一些需要优雅退出的并发模式中,关闭 channel 可以促使 select 快速响应并处理结束逻辑。 + +#### 示例:数据处理和广播信号 + +假设有一个数据处理任务,需要将数据分批发送到多个处理协程,处理完成后再汇总结果。这里可以使用关闭 channel 的方式来告知所有处理协程,数据已经发送完毕: + +```go +func processData(dataChunks [][]int) []int { + var results []int + resultChan := make(chan int) + dataChan := make(chan int, 100) + + // 启动多个工作协程 + for i := 0; i < 5; i++ { + go func() { + for data := range dataChan { + result := process(data) // 假设有一个处理函数 + resultChan <- result + } + }() + } + + // 发送数据 + go func() { + for _, chunk := range dataChunks { + for _, data := range chunk { + dataChan <- data + } + } + close(dataChan) + }() + + // 接收结果 + go func() { + for i := 0; i < len(dataChunks); i++ { + result := <-resultChan + results = append(results, result) + } + close(resultChan) + }() + + return results +} +``` + +在这个示例中,通过关闭 dataChan 来告知工作协程不会再有新的数据发送,这时协程可以结束从 channel 接收数据的操作。关闭 resultChan 则用来表示所有结果已经处理完毕,可以进行后续步骤。 + +总结来说,关闭一个 channel 是一种向接收方传递完成信号的方法,它在多协程协作的环境中尤为有用,有助于提高代码的可读性和安全性。 + ## Go如何避免内存的对象频繁分配和回收的问题? 可以考虑使用对象缓存池sync.Pool @@ -877,6 +946,101 @@ func As(err error, target interface{}) bool // 判断err是否为target类型 [一篇文章带你轻松搞懂Golang的error处理_Golang_脚本之家](https://www.jb51.net/article/254917.htm) +## Go错误处理机制为啥不采用Java的try-catch的异常机制? + +Go 语言选择不使用 Java 中的异常机制,而是采用返回错误(error)的方式处理异常情况,这主要是基于几个设计目标和考虑: + +- **简明性和可预测性**:在 Go 语言中,错误被视为常见的、可预料的结果,而不是异常情况。通过显式地返回错误,程序员被迫处理错误,这可以使得错误处理更加显式和清晰,减少了忽视错误处理的可能性。 + +- **控制流简单化**:使用异常机制时,代码的执行流可以因为异常的抛出而在任何点被中断,这使得程序的控制流变得复杂和难以跟踪。相反,通过返回错误,错误处理和正常逻辑清晰地分离,控制流更加直观。 + +- **性能考虑**:异常处理机制通常涉及栈解 unwinding 过程,这在性能上可能带来开销。Go 语言通过简单的值返回避免了这种开销。 + +- **交叉语言调用简化**:Go 语言在设计时考虑到了与 C 语言的互操作性。在 C 语言中没有异常机制,使用错误码是常见做法。Go 的错误处理方式与之兼容,简化了跨语言调用的复杂性。 + +关于是否是一个好的设计,这取决于具体的使用场景和个人偏好。这种设计强化了错误处理的重要性,并鼓励开发者更加关注程序可能出现的错误情况,从而可能增强程序的健壮性。但是,这也可能导致代码中充满大量的错误处理逻辑,有时可能显得冗余。 + +Java 的异常机制有其优点,特别是在错误需要跨多个调用层级传播时,异常提供了一种非常便捷的方式。然而,它也有一些常见的问题: + +- **异常被忽略**:Java 中的异常可以被捕获后不做任何处理,这可能隐藏程序中的错误。 +- **代码复杂性**:异常的抛出和捕获可能使得程序逻辑变得难以理解和维护,特别是当异常可以在多个层级被抛出和捕获时。 +- **性能问题**:异常处理可能引入性能损耗,特别是在异常频繁抛出的场景中。 + +总体来说,Go 的错误处理方式倾向于更加显式和可控,而 Java 的异常机制则提供了一种灵活但有时可能不够清晰的错误处理策略。选择哪种方式更好,很大程度上取决于特定项目的需求和开发团队的偏好。 + +## Go为什么采用组合机制而不是使用继承机制? + +Go 语言没有传统意义上的类和继承机制,这是其设计哲学中的一部分,旨在简化语言和避免常见的面向对象编程的一些复杂性和问题。下面是 Go 语言这样设计的一些原因及其替代方式: + +### 为什么 Go 没有传统的继承? + +- **简化语言设计**:Go 的设计哲学是保持语言的简洁和高效。继承是一个强大但复杂的功能,可以导致多种编程问题,如复杂的依赖关系和难以预测的行为。 + +- **避免继承带来的问题**: + - **脆弱的基类问题**:基类的改变可能影响到大量的派生类。 + - **深层继承结构导致的复杂性**:随着继承链的增长,理解和维护代码变得更加困难。 + - **多重继承的复杂性**:如 C++ 中的多重继承可能导致菱形继承问题,增加了语言和编译器的复杂性。 + +### Go 如何实现多态? + +尽管 Go 没有继承,它通过接口来支持多态性。在 Go 中,接口是一组方法签名的集合,任何类型只要实现了这些方法,就被认为实现了该接口。这种方式与继承不同,更加灵活和简洁: + +- **接口隐式实现**:类型不需要声明它实现了哪个接口,这降低了代码之间的耦合。 +- **组合优于继承**:Go 通过组合(有时候通过嵌入结构体)来实现代码的复用,这比继承更加直接和清晰。 + +### Embedded Struct 算不算继承? + +Embedded struct(嵌入结构体)在 Go 中被用作实现类似继承的功能,但它更准确地被描述为组合。通过嵌入一个结构体,一个新的结构体可以直接访问嵌入结构体的方法和字段,这提供了一种方式来复用代码: + +- **不是真正的继承**:虽然看起来类似,嵌入结构体并不提供传统意义上的多态。 +- **代码复用和扩展**:它允许一种灵活的方式来扩展功能,而无需继承的复杂性。 + +### 传统继承的问题 + +- **过度耦合**:子类和父类之间的关系过于紧密,改动父类可能会影响所有子类。 +- **隐藏的复杂性**:继承可以导致代码的行为不透明,增加理解和调试的难度。 +- **难以正确使用**:正确地设计和维护一个继承体系需要大量的设计经验和技术洞察力。 + +Go 的设计选择鼓励开发者采用更简单、更易于理解和维护的编程范式。通过接口和组合,Go 提供了一种强大的工具集来建构灵活且可维护的代码结构,避免了许多传统面向对象编程中常见的陷阱。 + +### Go 中 channel 跟 Java 中 BlockingQueue 又有啥区别 ? + +Go 的 channel 和 Java 的 BlockingQueue 都是用于不同线程或协程间的通信机制,但它们的设计哲学和使用场景有所不同。这两种机制都用于解决并发编程中的同步问题,但具体的实现和适用的场景有差异。 + +### Channel 与 BlockingQueue 的区别 + +1. 设计哲学: + - Go 的 Channel:Channel 是 Go 语言中的一等公民,用于在协程(goroutines)之间进行通信。它遵循“通过通信来共享内存,而不是通过共享内存来通信”的哲学。 + - Java 的 BlockingQueue:是 Java 并发包中的一部分,主要用于线程间的通信,尤其在生产者-消费者模型中。它依赖于共享内存和锁来实现线程安全。 + +2. 功能实现: + - Channel 支持多种模式,如无缓冲、有缓冲通道,可以非常灵活地控制协程间的数据流和同步。 + - BlockingQueue 是一个接口,Java 提供了多种实现(如 ArrayBlockingQueue, LinkedBlockingQueue),主要通过阻塞操作来实现生产者和消费者之间的同步。 + +3. 用途和应用场景: + - Channel 通常用于协程间的信号传递和数据交换,特别是在需要控制并发操作顺序时。 + - BlockingQueue 通常用于处理较大的数据流或者在多线程环境下缓存数据。 + +### 共享内存并发 vs. Channel 并发 +#### 共享内存并发 + +- 适用场景:适合复杂的数据结构共享,或者当有多个线程需要访问和修改同一数据时。在多核处理器上,这种方式可以有效利用缓存一致性协议。 +- 优点:可以实现细粒度的控制,对于某些高性能计算场景可以更直接地管理内存。 +- 缺点:容易产生竞态条件,编程模型更加复杂,需要精确地控制锁和同步。 + +#### Channel 并发 + +- 适用场景:适合事件驱动或消息驱动的应用,如网络服务或并行数据处理。在这些场景中,通信模式清晰,各部分之间的解耦更彻底。 +- 优点:简化了并发和同步的管理,代码通常更易于理解和维护。 +- 缺点:在极端的高性能需求下,可能会因为消息传递的开销而不如直接的内存访问高效。 + +#### 选择建议 + +- 如果问题适合通过明确的消息传递进行模块化设计,或者当系统的可维护性和清晰的并发模型比原始性能更重要时,使用 Channel。 +- 如果需要最大限度地控制性能,并且可以管理更复杂的同步策略和竞态风险,使用共享内存可能更合适。 + +在实际开发中,选择合适的并发策略依赖于具体问题、性能需求和团队的熟悉度。对于维护性和开发效率有较高要求的项目,Channel 往往是一个更易于管理的选择。 + ## 资料 [【Golang开发面经】蔚来(两轮技术面)](https://zhuanlan.zhihu.com/p/574580955) \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml index 696980f..34c9994 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -36,8 +36,14 @@ nav: - 镜像: container/image.md - cgroup: container/cgroup.md - namespace: container/namespace.md - - 语言: + - 开发语言: - Go: language/Go.md + - Jupyter: + - Go-Frameworks-Github-Fork-Stats: jupyter/Go-Frameworks-Github-Fork-Stats.ipynb + - Pandas完全指南: jupyter/Pandas完全指南.ipynb + - Spark上手示例: + - jupyter/Spark上手示例1:RDD操作.ipynb + - jupyter/Spark上手示例2:DataFrame操作.ipynb - QA: - redis: qa/redis.md - mysql: qa/mysql.md @@ -48,7 +54,7 @@ nav: - 消息队列: qa/queue.md - IO: qa/io.md - protobuf: qa/protobuf.md - - go: qa/go.md + - go: qa/go.md - 分布式: qa/dist.md - Elasticsearch: qa/es.md - docker: qa/docker.md @@ -118,7 +124,12 @@ theme: plugins: - search: - separator: '[\s\u200b\-]' + separator: '[\s\u200b\-_,:!=\[\]()"`/]+|\.(?!\d)|&[lg]t;|(?!\b)(?=[A-Z][a-z])' + - git-revision-date-localized: + type: iso_date + - glightbox + - mkdocs-jupyter + - print-site # Additional configuration extra: