{ "cells": [ { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "# Premiers pas en *pandas*\n", "\n", "***\n", "> __Auteur__: Joseph Salmon\n", "> , adapté en francais du travail de Joris Van den Bossche:\n", "https://github.com/jorisvandenbossche/pandas-tutorial/blob/master/01-pandas_introduction.ipynb" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "slideshow": { "slide_type": "skip" } }, "outputs": [], "source": [ "# Création d'un sommaire si besoin\n", "\n", "# \n", "# ## Sommaire\n", "\n", "# * __[Introduction et présentation](#intro)__
" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ " \n", "\n", "## Introduction et présentation" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "slideshow": { "slide_type": "subslide" } }, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "pd.options.display.max_rows = 8" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "slideshow": { "slide_type": "skip" } }, "outputs": [], "source": [ "%matplotlib notebook" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "### Cas 1: Survie sur le Titanic " ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "slideshow": { "slide_type": "subslide" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Replace is False and data exists, so doing nothing. Use replace==True to re-download the data.\n" ] } ], "source": [ "from download import download\n", "\n", "url = \"http://josephsalmon.eu/enseignement/datasets/titanic.csv\"\n", "path_target = \"./titanic.csv\"\n", "download(url, path_target, replace=False)\n", "\n", "# df est souvent utilisé comme acronyme de \"data frame\"\n", "df_titanic_raw = pd.read_csv(\"titanic.csv\")" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "slideshow": { "slide_type": "subslide" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
0103Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNS
1211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C
2313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNS
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S
.......................................
6701McCarthy, Mr. Timothy Jmale54.0001746351.8625E46S
7803Palsson, Master. Gosta Leonardmale2.03134990921.0750NaNS
8913Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)female27.00234774211.1333NaNS
91012Nasser, Mrs. Nicholas (Adele Achem)female14.01023773630.0708NaNC
\n", "

10 rows × 12 columns

\n", "
" ], "text/plain": [ " PassengerId Survived Pclass \\\n", "0 1 0 3 \n", "1 2 1 1 \n", "2 3 1 3 \n", "3 4 1 1 \n", ".. ... ... ... \n", "6 7 0 1 \n", "7 8 0 3 \n", "8 9 1 3 \n", "9 10 1 2 \n", "\n", " Name Sex Age SibSp \\\n", "0 Braund, Mr. Owen Harris male 22.0 1 \n", "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", "2 Heikkinen, Miss. Laina female 26.0 0 \n", "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", ".. ... ... ... ... \n", "6 McCarthy, Mr. Timothy J male 54.0 0 \n", "7 Palsson, Master. Gosta Leonard male 2.0 3 \n", "8 Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) female 27.0 0 \n", "9 Nasser, Mrs. Nicholas (Adele Achem) female 14.0 1 \n", "\n", " Parch Ticket Fare Cabin Embarked \n", "0 0 A/5 21171 7.2500 NaN S \n", "1 0 PC 17599 71.2833 C85 C \n", "2 0 STON/O2. 3101282 7.9250 NaN S \n", "3 0 113803 53.1000 C123 S \n", ".. ... ... ... ... ... \n", "6 0 17463 51.8625 E46 S \n", "7 1 349909 21.0750 NaN S \n", "8 2 347742 11.1333 NaN S \n", "9 0 237736 30.0708 NaN C \n", "\n", "[10 rows x 12 columns]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_titanic_raw.head(n=10)" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "### Valeurs manquantes (en: missing values)\n", "Pour faciliter la suite on ne garde que les observations qui sont complètes, on enlève donc ici les valeurs manquantes" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "slideshow": { "slide_type": "subslide" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
1211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S
6701McCarthy, Mr. Timothy Jmale54.0001746351.8625E46S
101113Sandstrom, Miss. Marguerite Rutfemale4.011PP 954916.7000G6S
111211Bonnell, Miss. Elizabethfemale58.00011378326.5500C103S
\n", "
" ], "text/plain": [ " PassengerId Survived Pclass \\\n", "1 2 1 1 \n", "3 4 1 1 \n", "6 7 0 1 \n", "10 11 1 3 \n", "11 12 1 1 \n", "\n", " Name Sex Age SibSp \\\n", "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", "6 McCarthy, Mr. Timothy J male 54.0 0 \n", "10 Sandstrom, Miss. Marguerite Rut female 4.0 1 \n", "11 Bonnell, Miss. Elizabeth female 58.0 0 \n", "\n", " Parch Ticket Fare Cabin Embarked \n", "1 0 PC 17599 71.2833 C85 C \n", "3 0 113803 53.1000 C123 S \n", "6 0 17463 51.8625 E46 S \n", "10 1 PP 9549 16.7000 G6 S \n", "11 0 113783 26.5500 C103 S " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_titanic = df_titanic_raw.dropna()\n", "df_titanic.head()" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "### Description succinte des variables:\n", "- Survival - Survie (0 = Non; 1 = Oui).\n", "- Pclass - Passenger Class / Classe du passager (1 = 1ere; 2 = 2nde; 3 = 3ème)\n", "- Name - Nom\n", "- Sex - Sexe\n", "- Age - Age\n", "- Sibsp - Nombre de frères / soeurs / maris / épouses à bord\n", "- Parch - Nombre de parents ascendants / enfants à bord\n", "- Ticket - Numéro du ticket\n", "- Fare - Prix du ticket (British pound)\n", "- Cabin - Cabine\n", "- Embarked - Port d'embarquation (C = Cherbourg; Q = Queenstown; S = Southampton)" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "### Commandes de statistiques descriptives:\n", "- count - effectif\n", "- mean - moyenne\n", "- std (**st**andard **d**eviation - écart-type)\n" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "slideshow": { "slide_type": "subslide" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PassengerIdSurvivedPclassAgeSibSpParchFare
count183.000000183.000000183.000000183.000000183.000000183.000000183.000000
mean455.3661200.6721311.19125735.6744260.4644810.47541078.682469
std247.0524760.4707250.51518715.6438660.6441590.75461776.347843
min2.0000000.0000001.0000000.9200000.0000000.0000000.000000
25%263.5000000.0000001.00000024.0000000.0000000.00000029.700000
50%457.0000001.0000001.00000036.0000000.0000000.00000057.000000
75%676.0000001.0000001.00000047.5000001.0000001.00000090.000000
max890.0000001.0000003.00000080.0000003.0000004.000000512.329200
\n", "
" ], "text/plain": [ " PassengerId Survived Pclass Age SibSp \\\n", "count 183.000000 183.000000 183.000000 183.000000 183.000000 \n", "mean 455.366120 0.672131 1.191257 35.674426 0.464481 \n", "std 247.052476 0.470725 0.515187 15.643866 0.644159 \n", "min 2.000000 0.000000 1.000000 0.920000 0.000000 \n", "25% 263.500000 0.000000 1.000000 24.000000 0.000000 \n", "50% 457.000000 1.000000 1.000000 36.000000 0.000000 \n", "75% 676.000000 1.000000 1.000000 47.500000 1.000000 \n", "max 890.000000 1.000000 3.000000 80.000000 3.000000 \n", "\n", " Parch Fare \n", "count 183.000000 183.000000 \n", "mean 0.475410 78.682469 \n", "std 0.754617 76.347843 \n", "min 0.000000 0.000000 \n", "25% 0.000000 29.700000 \n", "50% 0.000000 57.000000 \n", "75% 1.000000 90.000000 \n", "max 4.000000 512.329200 " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_titanic.describe()" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "### Compréhension et visualisation de la base de données:" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "subslide" } }, "source": [ "**Quelle est la répartition par âge des passagers?**" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "slideshow": { "slide_type": "subslide" } }, "outputs": [ { "data": { "application/javascript": [ "/* Put everything inside the global mpl namespace */\n", "window.mpl = {};\n", "\n", "\n", "mpl.get_websocket_type = function() {\n", " if (typeof(WebSocket) !== 'undefined') {\n", " return WebSocket;\n", " } else if (typeof(MozWebSocket) !== 'undefined') {\n", " return MozWebSocket;\n", " } else {\n", " alert('Your browser does not have WebSocket support.' +\n", " 'Please try Chrome, Safari or Firefox ≥ 6. ' +\n", " 'Firefox 4 and 5 are also supported but you ' +\n", " 'have to enable WebSockets in about:config.');\n", " };\n", "}\n", "\n", "mpl.figure = function(figure_id, websocket, ondownload, parent_element) {\n", " this.id = figure_id;\n", "\n", " this.ws = websocket;\n", "\n", " this.supports_binary = (this.ws.binaryType != undefined);\n", "\n", " if (!this.supports_binary) {\n", " var warnings = document.getElementById(\"mpl-warnings\");\n", " if (warnings) {\n", " warnings.style.display = 'block';\n", " warnings.textContent = (\n", " \"This browser does not support binary websocket messages. \" +\n", " \"Performance may be slow.\");\n", " }\n", " }\n", "\n", " this.imageObj = new Image();\n", "\n", " this.context = undefined;\n", " this.message = undefined;\n", " this.canvas = undefined;\n", " this.rubberband_canvas = undefined;\n", " this.rubberband_context = undefined;\n", " this.format_dropdown = undefined;\n", "\n", " this.image_mode = 'full';\n", "\n", " this.root = $('
');\n", " this._root_extra_style(this.root)\n", " this.root.attr('style', 'display: inline-block');\n", "\n", " $(parent_element).append(this.root);\n", "\n", " this._init_header(this);\n", " this._init_canvas(this);\n", " this._init_toolbar(this);\n", "\n", " var fig = this;\n", "\n", " this.waiting = false;\n", "\n", " this.ws.onopen = function () {\n", " fig.send_message(\"supports_binary\", {value: fig.supports_binary});\n", " fig.send_message(\"send_image_mode\", {});\n", " if (mpl.ratio != 1) {\n", " fig.send_message(\"set_dpi_ratio\", {'dpi_ratio': mpl.ratio});\n", " }\n", " fig.send_message(\"refresh\", {});\n", " }\n", "\n", " this.imageObj.onload = function() {\n", " if (fig.image_mode == 'full') {\n", " // Full images could contain transparency (where diff images\n", " // almost always do), so we need to clear the canvas so that\n", " // there is no ghosting.\n", " fig.context.clearRect(0, 0, fig.canvas.width, fig.canvas.height);\n", " }\n", " fig.context.drawImage(fig.imageObj, 0, 0);\n", " };\n", "\n", " this.imageObj.onunload = function() {\n", " fig.ws.close();\n", " }\n", "\n", " this.ws.onmessage = this._make_on_message_function(this);\n", "\n", " this.ondownload = ondownload;\n", "}\n", "\n", "mpl.figure.prototype._init_header = function() {\n", " var titlebar = $(\n", " '
');\n", " var titletext = $(\n", " '
');\n", " titlebar.append(titletext)\n", " this.root.append(titlebar);\n", " this.header = titletext[0];\n", "}\n", "\n", "\n", "\n", "mpl.figure.prototype._canvas_extra_style = function(canvas_div) {\n", "\n", "}\n", "\n", "\n", "mpl.figure.prototype._root_extra_style = function(canvas_div) {\n", "\n", "}\n", "\n", "mpl.figure.prototype._init_canvas = function() {\n", " var fig = this;\n", "\n", " var canvas_div = $('
');\n", "\n", " canvas_div.attr('style', 'position: relative; clear: both; outline: 0');\n", "\n", " function canvas_keyboard_event(event) {\n", " return fig.key_event(event, event['data']);\n", " }\n", "\n", " canvas_div.keydown('key_press', canvas_keyboard_event);\n", " canvas_div.keyup('key_release', canvas_keyboard_event);\n", " this.canvas_div = canvas_div\n", " this._canvas_extra_style(canvas_div)\n", " this.root.append(canvas_div);\n", "\n", " var canvas = $('');\n", " canvas.addClass('mpl-canvas');\n", " canvas.attr('style', \"left: 0; top: 0; z-index: 0; outline: 0\")\n", "\n", " this.canvas = canvas[0];\n", " this.context = canvas[0].getContext(\"2d\");\n", "\n", " var backingStore = this.context.backingStorePixelRatio ||\n", "\tthis.context.webkitBackingStorePixelRatio ||\n", "\tthis.context.mozBackingStorePixelRatio ||\n", "\tthis.context.msBackingStorePixelRatio ||\n", "\tthis.context.oBackingStorePixelRatio ||\n", "\tthis.context.backingStorePixelRatio || 1;\n", "\n", " mpl.ratio = (window.devicePixelRatio || 1) / backingStore;\n", "\n", " var rubberband = $('');\n", " rubberband.attr('style', \"position: absolute; left: 0; top: 0; z-index: 1;\")\n", "\n", " var pass_mouse_events = true;\n", "\n", " canvas_div.resizable({\n", " start: function(event, ui) {\n", " pass_mouse_events = false;\n", " },\n", " resize: function(event, ui) {\n", " fig.request_resize(ui.size.width, ui.size.height);\n", " },\n", " stop: function(event, ui) {\n", " pass_mouse_events = true;\n", " fig.request_resize(ui.size.width, ui.size.height);\n", " },\n", " });\n", "\n", " function mouse_event_fn(event) {\n", " if (pass_mouse_events)\n", " return fig.mouse_event(event, event['data']);\n", " }\n", "\n", " rubberband.mousedown('button_press', mouse_event_fn);\n", " rubberband.mouseup('button_release', mouse_event_fn);\n", " // Throttle sequential mouse events to 1 every 20ms.\n", " rubberband.mousemove('motion_notify', mouse_event_fn);\n", "\n", " rubberband.mouseenter('figure_enter', mouse_event_fn);\n", " rubberband.mouseleave('figure_leave', mouse_event_fn);\n", "\n", " canvas_div.on(\"wheel\", function (event) {\n", " event = event.originalEvent;\n", " event['data'] = 'scroll'\n", " if (event.deltaY < 0) {\n", " event.step = 1;\n", " } else {\n", " event.step = -1;\n", " }\n", " mouse_event_fn(event);\n", " });\n", "\n", " canvas_div.append(canvas);\n", " canvas_div.append(rubberband);\n", "\n", " this.rubberband = rubberband;\n", " this.rubberband_canvas = rubberband[0];\n", " this.rubberband_context = rubberband[0].getContext(\"2d\");\n", " this.rubberband_context.strokeStyle = \"#000000\";\n", "\n", " this._resize_canvas = function(width, height) {\n", " // Keep the size of the canvas, canvas container, and rubber band\n", " // canvas in synch.\n", " canvas_div.css('width', width)\n", " canvas_div.css('height', height)\n", "\n", " canvas.attr('width', width * mpl.ratio);\n", " canvas.attr('height', height * mpl.ratio);\n", " canvas.attr('style', 'width: ' + width + 'px; height: ' + height + 'px;');\n", "\n", " rubberband.attr('width', width);\n", " rubberband.attr('height', height);\n", " }\n", "\n", " // Set the figure to an initial 600x600px, this will subsequently be updated\n", " // upon first draw.\n", " this._resize_canvas(600, 600);\n", "\n", " // Disable right mouse context menu.\n", " $(this.rubberband_canvas).bind(\"contextmenu\",function(e){\n", " return false;\n", " });\n", "\n", " function set_focus () {\n", " canvas.focus();\n", " canvas_div.focus();\n", " }\n", "\n", " window.setTimeout(set_focus, 100);\n", "}\n", "\n", "mpl.figure.prototype._init_toolbar = function() {\n", " var fig = this;\n", "\n", " var nav_element = $('
')\n", " nav_element.attr('style', 'width: 100%');\n", " this.root.append(nav_element);\n", "\n", " // Define a callback function for later on.\n", " function toolbar_event(event) {\n", " return fig.toolbar_button_onclick(event['data']);\n", " }\n", " function toolbar_mouse_event(event) {\n", " return fig.toolbar_button_onmouseover(event['data']);\n", " }\n", "\n", " for(var toolbar_ind in mpl.toolbar_items) {\n", " var name = mpl.toolbar_items[toolbar_ind][0];\n", " var tooltip = mpl.toolbar_items[toolbar_ind][1];\n", " var image = mpl.toolbar_items[toolbar_ind][2];\n", " var method_name = mpl.toolbar_items[toolbar_ind][3];\n", "\n", " if (!name) {\n", " // put a spacer in here.\n", " continue;\n", " }\n", " var button = $('');\n", " button.click(method_name, toolbar_event);\n", " button.mouseover(tooltip, toolbar_mouse_event);\n", " nav_element.append(button);\n", " }\n", "\n", " // Add the status bar.\n", " var status_bar = $('');\n", " nav_element.append(status_bar);\n", " this.message = status_bar[0];\n", "\n", " // Add the close button to the window.\n", " var buttongrp = $('
');\n", " var button = $('');\n", " button.click(function (evt) { fig.handle_close(fig, {}); } );\n", " button.mouseover('Stop Interaction', toolbar_mouse_event);\n", " buttongrp.append(button);\n", " var titlebar = this.root.find($('.ui-dialog-titlebar'));\n", " titlebar.prepend(buttongrp);\n", "}\n", "\n", "mpl.figure.prototype._root_extra_style = function(el){\n", " var fig = this\n", " el.on(\"remove\", function(){\n", "\tfig.close_ws(fig, {});\n", " });\n", "}\n", "\n", "mpl.figure.prototype._canvas_extra_style = function(el){\n", " // this is important to make the div 'focusable\n", " el.attr('tabindex', 0)\n", " // reach out to IPython and tell the keyboard manager to turn it's self\n", " // off when our div gets focus\n", "\n", " // location in version 3\n", " if (IPython.notebook.keyboard_manager) {\n", " IPython.notebook.keyboard_manager.register_events(el);\n", " }\n", " else {\n", " // location in version 2\n", " IPython.keyboard_manager.register_events(el);\n", " }\n", "\n", "}\n", "\n", "mpl.figure.prototype._key_event_extra = function(event, name) {\n", " var manager = IPython.notebook.keyboard_manager;\n", " if (!manager)\n", " manager = IPython.keyboard_manager;\n", "\n", " // Check for shift+enter\n", " if (event.shiftKey && event.which == 13) {\n", " this.canvas_div.blur();\n", " event.shiftKey = false;\n", " // Send a \"J\" for go to next cell\n", " event.which = 74;\n", " event.keyCode = 74;\n", " manager.command_mode();\n", " manager.handle_keydown(event);\n", " }\n", "}\n", "\n", "mpl.figure.prototype.handle_save = function(fig, msg) {\n", " fig.ondownload(fig, null);\n", "}\n", "\n", "\n", "mpl.find_output_cell = function(html_output) {\n", " // Return the cell and output element which can be found *uniquely* in the notebook.\n", " // Note - this is a bit hacky, but it is done because the \"notebook_saving.Notebook\"\n", " // IPython event is triggered only after the cells have been serialised, which for\n", " // our purposes (turning an active figure into a static one), is too late.\n", " var cells = IPython.notebook.get_cells();\n", " var ncells = cells.length;\n", " for (var i=0; i= 3 moved mimebundle to data attribute of output\n", " data = data.data;\n", " }\n", " if (data['text/html'] == html_output) {\n", " return [cell, data, j];\n", " }\n", " }\n", " }\n", " }\n", "}\n", "\n", "// Register the function which deals with the matplotlib target/channel.\n", "// The kernel may be null if the page has been refreshed.\n", "if (IPython.notebook.kernel != null) {\n", " IPython.notebook.kernel.comm_manager.register_target('matplotlib', mpl.mpl_figure_comm);\n", "}\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "Text(0.5, 1.0, \"Estimation de la densité de l'âge des passagers\")" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "plt.figure()\n", "ax = sns.kdeplot(df_titanic['Age'], shade=True)\n", "plt.xlabel('Proportion')\n", "plt.ylabel('Age')\n", "ax.legend().set_visible(False)\n", "plt.title(\"Estimation de la densité de l'âge des passagers\")" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "**Comment le taux de survie des passagers diffère-t-il entre les sexes?**" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "slideshow": { "slide_type": "subslide" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Survived
Sex
female0.931818
male0.431579
\n", "
" ], "text/plain": [ " Survived\n", "Sex \n", "female 0.931818\n", "male 0.431579" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_titanic.groupby('Sex')[['Survived']].aggregate(lambda x: x.sum() / len(x))" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "**Comment le taux de survie des passagers diffère-t-il entre les différentes classes?**" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "slideshow": { "slide_type": "subslide" } }, "outputs": [ { "data": { "application/javascript": [ "/* Put everything inside the global mpl namespace */\n", "window.mpl = {};\n", "\n", "\n", "mpl.get_websocket_type = function() {\n", " if (typeof(WebSocket) !== 'undefined') {\n", " return WebSocket;\n", " } else if (typeof(MozWebSocket) !== 'undefined') {\n", " return MozWebSocket;\n", " } else {\n", " alert('Your browser does not have WebSocket support.' +\n", " 'Please try Chrome, Safari or Firefox ≥ 6. ' +\n", " 'Firefox 4 and 5 are also supported but you ' +\n", " 'have to enable WebSockets in about:config.');\n", " };\n", "}\n", "\n", "mpl.figure = function(figure_id, websocket, ondownload, parent_element) {\n", " this.id = figure_id;\n", "\n", " this.ws = websocket;\n", "\n", " this.supports_binary = (this.ws.binaryType != undefined);\n", "\n", " if (!this.supports_binary) {\n", " var warnings = document.getElementById(\"mpl-warnings\");\n", " if (warnings) {\n", " warnings.style.display = 'block';\n", " warnings.textContent = (\n", " \"This browser does not support binary websocket messages. \" +\n", " \"Performance may be slow.\");\n", " }\n", " }\n", "\n", " this.imageObj = new Image();\n", "\n", " this.context = undefined;\n", " this.message = undefined;\n", " this.canvas = undefined;\n", " this.rubberband_canvas = undefined;\n", " this.rubberband_context = undefined;\n", " this.format_dropdown = undefined;\n", "\n", " this.image_mode = 'full';\n", "\n", " this.root = $('
');\n", " this._root_extra_style(this.root)\n", " this.root.attr('style', 'display: inline-block');\n", "\n", " $(parent_element).append(this.root);\n", "\n", " this._init_header(this);\n", " this._init_canvas(this);\n", " this._init_toolbar(this);\n", "\n", " var fig = this;\n", "\n", " this.waiting = false;\n", "\n", " this.ws.onopen = function () {\n", " fig.send_message(\"supports_binary\", {value: fig.supports_binary});\n", " fig.send_message(\"send_image_mode\", {});\n", " if (mpl.ratio != 1) {\n", " fig.send_message(\"set_dpi_ratio\", {'dpi_ratio': mpl.ratio});\n", " }\n", " fig.send_message(\"refresh\", {});\n", " }\n", "\n", " this.imageObj.onload = function() {\n", " if (fig.image_mode == 'full') {\n", " // Full images could contain transparency (where diff images\n", " // almost always do), so we need to clear the canvas so that\n", " // there is no ghosting.\n", " fig.context.clearRect(0, 0, fig.canvas.width, fig.canvas.height);\n", " }\n", " fig.context.drawImage(fig.imageObj, 0, 0);\n", " };\n", "\n", " this.imageObj.onunload = function() {\n", " fig.ws.close();\n", " }\n", "\n", " this.ws.onmessage = this._make_on_message_function(this);\n", "\n", " this.ondownload = ondownload;\n", "}\n", "\n", "mpl.figure.prototype._init_header = function() {\n", " var titlebar = $(\n", " '
');\n", " var titletext = $(\n", " '
');\n", " titlebar.append(titletext)\n", " this.root.append(titlebar);\n", " this.header = titletext[0];\n", "}\n", "\n", "\n", "\n", "mpl.figure.prototype._canvas_extra_style = function(canvas_div) {\n", "\n", "}\n", "\n", "\n", "mpl.figure.prototype._root_extra_style = function(canvas_div) {\n", "\n", "}\n", "\n", "mpl.figure.prototype._init_canvas = function() {\n", " var fig = this;\n", "\n", " var canvas_div = $('
');\n", "\n", " canvas_div.attr('style', 'position: relative; clear: both; outline: 0');\n", "\n", " function canvas_keyboard_event(event) {\n", " return fig.key_event(event, event['data']);\n", " }\n", "\n", " canvas_div.keydown('key_press', canvas_keyboard_event);\n", " canvas_div.keyup('key_release', canvas_keyboard_event);\n", " this.canvas_div = canvas_div\n", " this._canvas_extra_style(canvas_div)\n", " this.root.append(canvas_div);\n", "\n", " var canvas = $('');\n", " canvas.addClass('mpl-canvas');\n", " canvas.attr('style', \"left: 0; top: 0; z-index: 0; outline: 0\")\n", "\n", " this.canvas = canvas[0];\n", " this.context = canvas[0].getContext(\"2d\");\n", "\n", " var backingStore = this.context.backingStorePixelRatio ||\n", "\tthis.context.webkitBackingStorePixelRatio ||\n", "\tthis.context.mozBackingStorePixelRatio ||\n", "\tthis.context.msBackingStorePixelRatio ||\n", "\tthis.context.oBackingStorePixelRatio ||\n", "\tthis.context.backingStorePixelRatio || 1;\n", "\n", " mpl.ratio = (window.devicePixelRatio || 1) / backingStore;\n", "\n", " var rubberband = $('');\n", " rubberband.attr('style', \"position: absolute; left: 0; top: 0; z-index: 1;\")\n", "\n", " var pass_mouse_events = true;\n", "\n", " canvas_div.resizable({\n", " start: function(event, ui) {\n", " pass_mouse_events = false;\n", " },\n", " resize: function(event, ui) {\n", " fig.request_resize(ui.size.width, ui.size.height);\n", " },\n", " stop: function(event, ui) {\n", " pass_mouse_events = true;\n", " fig.request_resize(ui.size.width, ui.size.height);\n", " },\n", " });\n", "\n", " function mouse_event_fn(event) {\n", " if (pass_mouse_events)\n", " return fig.mouse_event(event, event['data']);\n", " }\n", "\n", " rubberband.mousedown('button_press', mouse_event_fn);\n", " rubberband.mouseup('button_release', mouse_event_fn);\n", " // Throttle sequential mouse events to 1 every 20ms.\n", " rubberband.mousemove('motion_notify', mouse_event_fn);\n", "\n", " rubberband.mouseenter('figure_enter', mouse_event_fn);\n", " rubberband.mouseleave('figure_leave', mouse_event_fn);\n", "\n", " canvas_div.on(\"wheel\", function (event) {\n", " event = event.originalEvent;\n", " event['data'] = 'scroll'\n", " if (event.deltaY < 0) {\n", " event.step = 1;\n", " } else {\n", " event.step = -1;\n", " }\n", " mouse_event_fn(event);\n", " });\n", "\n", " canvas_div.append(canvas);\n", " canvas_div.append(rubberband);\n", "\n", " this.rubberband = rubberband;\n", " this.rubberband_canvas = rubberband[0];\n", " this.rubberband_context = rubberband[0].getContext(\"2d\");\n", " this.rubberband_context.strokeStyle = \"#000000\";\n", "\n", " this._resize_canvas = function(width, height) {\n", " // Keep the size of the canvas, canvas container, and rubber band\n", " // canvas in synch.\n", " canvas_div.css('width', width)\n", " canvas_div.css('height', height)\n", "\n", " canvas.attr('width', width * mpl.ratio);\n", " canvas.attr('height', height * mpl.ratio);\n", " canvas.attr('style', 'width: ' + width + 'px; height: ' + height + 'px;');\n", "\n", " rubberband.attr('width', width);\n", " rubberband.attr('height', height);\n", " }\n", "\n", " // Set the figure to an initial 600x600px, this will subsequently be updated\n", " // upon first draw.\n", " this._resize_canvas(600, 600);\n", "\n", " // Disable right mouse context menu.\n", " $(this.rubberband_canvas).bind(\"contextmenu\",function(e){\n", " return false;\n", " });\n", "\n", " function set_focus () {\n", " canvas.focus();\n", " canvas_div.focus();\n", " }\n", "\n", " window.setTimeout(set_focus, 100);\n", "}\n", "\n", "mpl.figure.prototype._init_toolbar = function() {\n", " var fig = this;\n", "\n", " var nav_element = $('
')\n", " nav_element.attr('style', 'width: 100%');\n", " this.root.append(nav_element);\n", "\n", " // Define a callback function for later on.\n", " function toolbar_event(event) {\n", " return fig.toolbar_button_onclick(event['data']);\n", " }\n", " function toolbar_mouse_event(event) {\n", " return fig.toolbar_button_onmouseover(event['data']);\n", " }\n", "\n", " for(var toolbar_ind in mpl.toolbar_items) {\n", " var name = mpl.toolbar_items[toolbar_ind][0];\n", " var tooltip = mpl.toolbar_items[toolbar_ind][1];\n", " var image = mpl.toolbar_items[toolbar_ind][2];\n", " var method_name = mpl.toolbar_items[toolbar_ind][3];\n", "\n", " if (!name) {\n", " // put a spacer in here.\n", " continue;\n", " }\n", " var button = $('');\n", " button.click(method_name, toolbar_event);\n", " button.mouseover(tooltip, toolbar_mouse_event);\n", " nav_element.append(button);\n", " }\n", "\n", " // Add the status bar.\n", " var status_bar = $('');\n", " nav_element.append(status_bar);\n", " this.message = status_bar[0];\n", "\n", " // Add the close button to the window.\n", " var buttongrp = $('
');\n", " var button = $('');\n", " button.click(function (evt) { fig.handle_close(fig, {}); } );\n", " button.mouseover('Stop Interaction', toolbar_mouse_event);\n", " buttongrp.append(button);\n", " var titlebar = this.root.find($('.ui-dialog-titlebar'));\n", " titlebar.prepend(buttongrp);\n", "}\n", "\n", "mpl.figure.prototype._root_extra_style = function(el){\n", " var fig = this\n", " el.on(\"remove\", function(){\n", "\tfig.close_ws(fig, {});\n", " });\n", "}\n", "\n", "mpl.figure.prototype._canvas_extra_style = function(el){\n", " // this is important to make the div 'focusable\n", " el.attr('tabindex', 0)\n", " // reach out to IPython and tell the keyboard manager to turn it's self\n", " // off when our div gets focus\n", "\n", " // location in version 3\n", " if (IPython.notebook.keyboard_manager) {\n", " IPython.notebook.keyboard_manager.register_events(el);\n", " }\n", " else {\n", " // location in version 2\n", " IPython.keyboard_manager.register_events(el);\n", " }\n", "\n", "}\n", "\n", "mpl.figure.prototype._key_event_extra = function(event, name) {\n", " var manager = IPython.notebook.keyboard_manager;\n", " if (!manager)\n", " manager = IPython.keyboard_manager;\n", "\n", " // Check for shift+enter\n", " if (event.shiftKey && event.which == 13) {\n", " this.canvas_div.blur();\n", " event.shiftKey = false;\n", " // Send a \"J\" for go to next cell\n", " event.which = 74;\n", " event.keyCode = 74;\n", " manager.command_mode();\n", " manager.handle_keydown(event);\n", " }\n", "}\n", "\n", "mpl.figure.prototype.handle_save = function(fig, msg) {\n", " fig.ondownload(fig, null);\n", "}\n", "\n", "\n", "mpl.find_output_cell = function(html_output) {\n", " // Return the cell and output element which can be found *uniquely* in the notebook.\n", " // Note - this is a bit hacky, but it is done because the \"notebook_saving.Notebook\"\n", " // IPython event is triggered only after the cells have been serialised, which for\n", " // our purposes (turning an active figure into a static one), is too late.\n", " var cells = IPython.notebook.get_cells();\n", " var ncells = cells.length;\n", " for (var i=0; i= 3 moved mimebundle to data attribute of output\n", " data = data.data;\n", " }\n", " if (data['text/html'] == html_output) {\n", " return [cell, data, j];\n", " }\n", " }\n", " }\n", " }\n", "}\n", "\n", "// Register the function which deals with the matplotlib target/channel.\n", "// The kernel may be null if the page has been refreshed.\n", "if (IPython.notebook.kernel != null) {\n", " IPython.notebook.kernel.comm_manager.register_target('matplotlib', mpl.mpl_figure_comm);\n", "}\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "fig, axes = plt.subplots(2, 1, figsize=(10, 4), sharex=True)\n", "\n", "axes[0].plot(polution_ts['O3'].resample('D').mean())\n", "axes[0].set_title(\"Polution à l'ozone: moyenne journalière sur Paris\")\n", "axes[0].set_ylabel(\"Concentration (µg/m³)\")\n", "\n", "axes[1].plot(polution_ts['NO2'].resample('D').mean())\n", "axes[1].set_title(\"Polution à l'azote: moyenne journalière sur Paris\")\n", "axes[1].set_ylabel(\"Concentration (µg/m³)\")\n", "\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "**La pollution atmosphérique montre-t-elle une tendance à la baisse au fil des ans?**" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "slideshow": { "slide_type": "subslide" } }, "outputs": [ { "data": { "application/javascript": [ "/* Put everything inside the global mpl namespace */\n", "window.mpl = {};\n", "\n", "\n", "mpl.get_websocket_type = function() {\n", " if (typeof(WebSocket) !== 'undefined') {\n", " return WebSocket;\n", " } else if (typeof(MozWebSocket) !== 'undefined') {\n", " return MozWebSocket;\n", " } else {\n", " alert('Your browser does not have WebSocket support.' +\n", " 'Please try Chrome, Safari or Firefox ≥ 6. ' +\n", " 'Firefox 4 and 5 are also supported but you ' +\n", " 'have to enable WebSockets in about:config.');\n", " };\n", "}\n", "\n", "mpl.figure = function(figure_id, websocket, ondownload, parent_element) {\n", " this.id = figure_id;\n", "\n", " this.ws = websocket;\n", "\n", " this.supports_binary = (this.ws.binaryType != undefined);\n", "\n", " if (!this.supports_binary) {\n", " var warnings = document.getElementById(\"mpl-warnings\");\n", " if (warnings) {\n", " warnings.style.display = 'block';\n", " warnings.textContent = (\n", " \"This browser does not support binary websocket messages. \" +\n", " \"Performance may be slow.\");\n", " }\n", " }\n", "\n", " this.imageObj = new Image();\n", "\n", " this.context = undefined;\n", " this.message = undefined;\n", " this.canvas = undefined;\n", " this.rubberband_canvas = undefined;\n", " this.rubberband_context = undefined;\n", " this.format_dropdown = undefined;\n", "\n", " this.image_mode = 'full';\n", "\n", " this.root = $('
');\n", " this._root_extra_style(this.root)\n", " this.root.attr('style', 'display: inline-block');\n", "\n", " $(parent_element).append(this.root);\n", "\n", " this._init_header(this);\n", " this._init_canvas(this);\n", " this._init_toolbar(this);\n", "\n", " var fig = this;\n", "\n", " this.waiting = false;\n", "\n", " this.ws.onopen = function () {\n", " fig.send_message(\"supports_binary\", {value: fig.supports_binary});\n", " fig.send_message(\"send_image_mode\", {});\n", " if (mpl.ratio != 1) {\n", " fig.send_message(\"set_dpi_ratio\", {'dpi_ratio': mpl.ratio});\n", " }\n", " fig.send_message(\"refresh\", {});\n", " }\n", "\n", " this.imageObj.onload = function() {\n", " if (fig.image_mode == 'full') {\n", " // Full images could contain transparency (where diff images\n", " // almost always do), so we need to clear the canvas so that\n", " // there is no ghosting.\n", " fig.context.clearRect(0, 0, fig.canvas.width, fig.canvas.height);\n", " }\n", " fig.context.drawImage(fig.imageObj, 0, 0);\n", " };\n", "\n", " this.imageObj.onunload = function() {\n", " fig.ws.close();\n", " }\n", "\n", " this.ws.onmessage = this._make_on_message_function(this);\n", "\n", " this.ondownload = ondownload;\n", "}\n", "\n", "mpl.figure.prototype._init_header = function() {\n", " var titlebar = $(\n", " '
');\n", " var titletext = $(\n", " '
');\n", " titlebar.append(titletext)\n", " this.root.append(titlebar);\n", " this.header = titletext[0];\n", "}\n", "\n", "\n", "\n", "mpl.figure.prototype._canvas_extra_style = function(canvas_div) {\n", "\n", "}\n", "\n", "\n", "mpl.figure.prototype._root_extra_style = function(canvas_div) {\n", "\n", "}\n", "\n", "mpl.figure.prototype._init_canvas = function() {\n", " var fig = this;\n", "\n", " var canvas_div = $('
');\n", "\n", " canvas_div.attr('style', 'position: relative; clear: both; outline: 0');\n", "\n", " function canvas_keyboard_event(event) {\n", " return fig.key_event(event, event['data']);\n", " }\n", "\n", " canvas_div.keydown('key_press', canvas_keyboard_event);\n", " canvas_div.keyup('key_release', canvas_keyboard_event);\n", " this.canvas_div = canvas_div\n", " this._canvas_extra_style(canvas_div)\n", " this.root.append(canvas_div);\n", "\n", " var canvas = $('');\n", " canvas.addClass('mpl-canvas');\n", " canvas.attr('style', \"left: 0; top: 0; z-index: 0; outline: 0\")\n", "\n", " this.canvas = canvas[0];\n", " this.context = canvas[0].getContext(\"2d\");\n", "\n", " var backingStore = this.context.backingStorePixelRatio ||\n", "\tthis.context.webkitBackingStorePixelRatio ||\n", "\tthis.context.mozBackingStorePixelRatio ||\n", "\tthis.context.msBackingStorePixelRatio ||\n", "\tthis.context.oBackingStorePixelRatio ||\n", "\tthis.context.backingStorePixelRatio || 1;\n", "\n", " mpl.ratio = (window.devicePixelRatio || 1) / backingStore;\n", "\n", " var rubberband = $('');\n", " rubberband.attr('style', \"position: absolute; left: 0; top: 0; z-index: 1;\")\n", "\n", " var pass_mouse_events = true;\n", "\n", " canvas_div.resizable({\n", " start: function(event, ui) {\n", " pass_mouse_events = false;\n", " },\n", " resize: function(event, ui) {\n", " fig.request_resize(ui.size.width, ui.size.height);\n", " },\n", " stop: function(event, ui) {\n", " pass_mouse_events = true;\n", " fig.request_resize(ui.size.width, ui.size.height);\n", " },\n", " });\n", "\n", " function mouse_event_fn(event) {\n", " if (pass_mouse_events)\n", " return fig.mouse_event(event, event['data']);\n", " }\n", "\n", " rubberband.mousedown('button_press', mouse_event_fn);\n", " rubberband.mouseup('button_release', mouse_event_fn);\n", " // Throttle sequential mouse events to 1 every 20ms.\n", " rubberband.mousemove('motion_notify', mouse_event_fn);\n", "\n", " rubberband.mouseenter('figure_enter', mouse_event_fn);\n", " rubberband.mouseleave('figure_leave', mouse_event_fn);\n", "\n", " canvas_div.on(\"wheel\", function (event) {\n", " event = event.originalEvent;\n", " event['data'] = 'scroll'\n", " if (event.deltaY < 0) {\n", " event.step = 1;\n", " } else {\n", " event.step = -1;\n", " }\n", " mouse_event_fn(event);\n", " });\n", "\n", " canvas_div.append(canvas);\n", " canvas_div.append(rubberband);\n", "\n", " this.rubberband = rubberband;\n", " this.rubberband_canvas = rubberband[0];\n", " this.rubberband_context = rubberband[0].getContext(\"2d\");\n", " this.rubberband_context.strokeStyle = \"#000000\";\n", "\n", " this._resize_canvas = function(width, height) {\n", " // Keep the size of the canvas, canvas container, and rubber band\n", " // canvas in synch.\n", " canvas_div.css('width', width)\n", " canvas_div.css('height', height)\n", "\n", " canvas.attr('width', width * mpl.ratio);\n", " canvas.attr('height', height * mpl.ratio);\n", " canvas.attr('style', 'width: ' + width + 'px; height: ' + height + 'px;');\n", "\n", " rubberband.attr('width', width);\n", " rubberband.attr('height', height);\n", " }\n", "\n", " // Set the figure to an initial 600x600px, this will subsequently be updated\n", " // upon first draw.\n", " this._resize_canvas(600, 600);\n", "\n", " // Disable right mouse context menu.\n", " $(this.rubberband_canvas).bind(\"contextmenu\",function(e){\n", " return false;\n", " });\n", "\n", " function set_focus () {\n", " canvas.focus();\n", " canvas_div.focus();\n", " }\n", "\n", " window.setTimeout(set_focus, 100);\n", "}\n", "\n", "mpl.figure.prototype._init_toolbar = function() {\n", " var fig = this;\n", "\n", " var nav_element = $('
')\n", " nav_element.attr('style', 'width: 100%');\n", " this.root.append(nav_element);\n", "\n", " // Define a callback function for later on.\n", " function toolbar_event(event) {\n", " return fig.toolbar_button_onclick(event['data']);\n", " }\n", " function toolbar_mouse_event(event) {\n", " return fig.toolbar_button_onmouseover(event['data']);\n", " }\n", "\n", " for(var toolbar_ind in mpl.toolbar_items) {\n", " var name = mpl.toolbar_items[toolbar_ind][0];\n", " var tooltip = mpl.toolbar_items[toolbar_ind][1];\n", " var image = mpl.toolbar_items[toolbar_ind][2];\n", " var method_name = mpl.toolbar_items[toolbar_ind][3];\n", "\n", " if (!name) {\n", " // put a spacer in here.\n", " continue;\n", " }\n", " var button = $('');\n", " button.click(method_name, toolbar_event);\n", " button.mouseover(tooltip, toolbar_mouse_event);\n", " nav_element.append(button);\n", " }\n", "\n", " // Add the status bar.\n", " var status_bar = $('');\n", " nav_element.append(status_bar);\n", " this.message = status_bar[0];\n", "\n", " // Add the close button to the window.\n", " var buttongrp = $('
');\n", " var button = $('');\n", " button.click(function (evt) { fig.handle_close(fig, {}); } );\n", " button.mouseover('Stop Interaction', toolbar_mouse_event);\n", " buttongrp.append(button);\n", " var titlebar = this.root.find($('.ui-dialog-titlebar'));\n", " titlebar.prepend(buttongrp);\n", "}\n", "\n", "mpl.figure.prototype._root_extra_style = function(el){\n", " var fig = this\n", " el.on(\"remove\", function(){\n", "\tfig.close_ws(fig, {});\n", " });\n", "}\n", "\n", "mpl.figure.prototype._canvas_extra_style = function(el){\n", " // this is important to make the div 'focusable\n", " el.attr('tabindex', 0)\n", " // reach out to IPython and tell the keyboard manager to turn it's self\n", " // off when our div gets focus\n", "\n", " // location in version 3\n", " if (IPython.notebook.keyboard_manager) {\n", " IPython.notebook.keyboard_manager.register_events(el);\n", " }\n", " else {\n", " // location in version 2\n", " IPython.keyboard_manager.register_events(el);\n", " }\n", "\n", "}\n", "\n", "mpl.figure.prototype._key_event_extra = function(event, name) {\n", " var manager = IPython.notebook.keyboard_manager;\n", " if (!manager)\n", " manager = IPython.keyboard_manager;\n", "\n", " // Check for shift+enter\n", " if (event.shiftKey && event.which == 13) {\n", " this.canvas_div.blur();\n", " event.shiftKey = false;\n", " // Send a \"J\" for go to next cell\n", " event.which = 74;\n", " event.keyCode = 74;\n", " manager.command_mode();\n", " manager.handle_keydown(event);\n", " }\n", "}\n", "\n", "mpl.figure.prototype.handle_save = function(fig, msg) {\n", " fig.ondownload(fig, null);\n", "}\n", "\n", "\n", "mpl.find_output_cell = function(html_output) {\n", " // Return the cell and output element which can be found *uniquely* in the notebook.\n", " // Note - this is a bit hacky, but it is done because the \"notebook_saving.Notebook\"\n", " // IPython event is triggered only after the cells have been serialised, which for\n", " // our purposes (turning an active figure into a static one), is too late.\n", " var cells = IPython.notebook.get_cells();\n", " var ncells = cells.length;\n", " for (var i=0; i= 3 moved mimebundle to data attribute of output\n", " data = data.data;\n", " }\n", " if (data['text/html'] == html_output) {\n", " return [cell, data, j];\n", " }\n", " }\n", " }\n", " }\n", "}\n", "\n", "// Register the function which deals with the matplotlib target/channel.\n", "// The kernel may be null if the page has been refreshed.\n", "if (IPython.notebook.kernel != null) {\n", " IPython.notebook.kernel.comm_manager.register_target('matplotlib', mpl.mpl_figure_comm);\n", "}\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "sns.set_palette(\"GnBu_d\", n_colors=7)\n", "polution_ts['weekday'] = polution_ts.index.weekday # Monday=0, Sunday=6\n", "\n", "# polution_ts['weekend'] = polution_ts['weekday'].isin([5, 6])\n", "\n", "days = ['Lundi','Mardi', 'Mercredi','Jeudi','Vendredi','Samedi','Dimanche'] \n", "\n", "polution_week_no2 = polution_ts.groupby(['weekday', polution_ts.index.hour])['NO2'].mean().unstack(level=0)\n", "polution_week_03 = polution_ts.groupby(['weekday', polution_ts.index.hour])['O3'].mean().unstack(level=0)\n", "\n", "\n", "fig, axes = plt.subplots(2, 1, figsize=(7,7), sharex=True)\n", "\n", "polution_week_no2.plot(ax=axes[0])\n", "axes[0].set_ylabel(\"Concentration (µg/m³)\")\n", "axes[0].set_xlabel(\"Heure de la journée\")\n", "axes[0].set_title(\"Profil journalier de la pollution au NO2: effet du weekend?\")\n", "axes[0].set_xticks(np.arange(0,24))\n", "axes[0].set_xticklabels(np.arange(0,24),rotation=45)\n", "axes[0].set_ylim(0,60)\n", "\n", "polution_week_03.plot(ax=axes[1])\n", "axes[1].set_ylabel(\"Concentration (µg/m³)\")\n", "axes[1].set_xlabel(\"Heure de la journée\")\n", "axes[1].set_title(\"Profil journalier de la pollution au O3: effet du weekend?\")\n", "axes[1].set_xticks(np.arange(0,24))\n", "axes[1].set_xticklabels(np.arange(0,24),rotation=45)\n", "axes[1].set_ylim(0,70)\n", "axes[1].legend().set_visible(False)\n", "# ax.legend()\n", "axes[0].legend(labels=days,loc='lower left', bbox_to_anchor=(1, 0.5))\n", "\n", "plt.tight_layout()" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "### EXERCICE: quid des saisons?" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Pandas: anlayser des données avec Python " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Pour les travaux intensifs en données en Python, la bibliothèque Pandas est devenue essentielle.\n", "\n", "Qu'est ce que pandas? C'est un environnement qui gère des Data Frame:\n", "\n", "- Pandas peut gérer *Data Frame* des tableaux *numpy* avec des étiquettes pour les lignes et les colonnes, et permet une prise en charge des types de données hétérogènes.\n", "- Pandas peut aussi être considéré comme le data.frame de R en Python.\n", "- Puissant pour travailler avec les données manquantes, travailler avec des données chronologiques, pour lire et écrire vos données, pour remodeler, regrouper, fusionner vos données, ...\n", "\n", "Documentation: http://pandas.pydata.org/pandas-docs/stable/" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Quand a-t-on besoin de Pandas?\n", "Quand on travaille avec des tableaux ou des structures de données(commme des dataframe R, SQL table, Excel, Spreadsheet, ...):\n", "\n", "- Importer des données\n", "- Nettoyer des données \"sales\" \n", "- Explorer et comprendre des données\n", "- Traiter et preparer les données pour faire une analyse \n", "- Analyser les données (avec en plus scikit-learn, statsmodels,...)\n", "
\n", "
\n", "\n", "**ATTENTION / LIMITES:**\n", "\n", "Pandas est bon pour travailler avec des données hétérogènes et des tableaux 1D/2D, mais tous les types de données ne correspondent pas à ces structures!\n", "\n", "Contre-exemples:\n", "- Quand on travaille avec des données de type **array** (e.g. images): utiliser *numpy*\n", "- Pour des données multidimensionnelles étiquetées (e.g. données de climat): voir [xarray](http://xarray.pydata.org/en/stable/)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Les structures de données en pandas : DataFrame et Series" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Un DataFrame est une structure de données tabulaire (un objet multidimensionnel pouvant contenir des données étiquetées) composé de lignes et de colonnes, semblable à une feuille de calcul, une table de base de données ou à l'objet data.frame de R. Vous pouvez le considérer comme plusieurs objets Series partageant le même index." ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
1211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S
6701McCarthy, Mr. Timothy Jmale54.0001746351.8625E46S
101113Sandstrom, Miss. Marguerite Rutfemale4.011PP 954916.7000G6S
.......................................
87287301Carlsson, Mr. Frans Olofmale33.0006955.0000B51 B53 B55S
87988011Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)female56.0011176783.1583C50C
88788811Graham, Miss. Margaret Edithfemale19.00011205330.0000B42S
88989011Behr, Mr. Karl Howellmale26.00011136930.0000C148C
\n", "

183 rows × 12 columns

\n", "
" ], "text/plain": [ " PassengerId Survived Pclass \\\n", "1 2 1 1 \n", "3 4 1 1 \n", "6 7 0 1 \n", "10 11 1 3 \n", ".. ... ... ... \n", "872 873 0 1 \n", "879 880 1 1 \n", "887 888 1 1 \n", "889 890 1 1 \n", "\n", " Name Sex Age SibSp \\\n", "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", "6 McCarthy, Mr. Timothy J male 54.0 0 \n", "10 Sandstrom, Miss. Marguerite Rut female 4.0 1 \n", ".. ... ... ... ... \n", "872 Carlsson, Mr. Frans Olof male 33.0 0 \n", "879 Potter, Mrs. Thomas Jr (Lily Alexenia Wilson) female 56.0 0 \n", "887 Graham, Miss. Margaret Edith female 19.0 0 \n", "889 Behr, Mr. Karl Howell male 26.0 0 \n", "\n", " Parch Ticket Fare Cabin Embarked \n", "1 0 PC 17599 71.2833 C85 C \n", "3 0 113803 53.1000 C123 S \n", "6 0 17463 51.8625 E46 S \n", "10 1 PP 9549 16.7000 G6 S \n", ".. ... ... ... ... ... \n", "872 0 695 5.0000 B51 B53 B55 S \n", "879 1 11767 83.1583 C50 C \n", "887 0 112053 30.0000 B42 S \n", "889 0 111369 30.0000 C148 C \n", "\n", "[183 rows x 12 columns]" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_titanic" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Int64Index([ 1, 3, 6, 10, 11, 21, 23, 27, 52, 54,\n", " ...\n", " 835, 853, 857, 862, 867, 871, 872, 879, 887, 889],\n", " dtype='int64', length=183)" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_titanic.index" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp',\n", " 'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],\n", " dtype='object')" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_titanic.columns" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "PassengerId int64\n", "Survived int64\n", "Pclass int64\n", "Name object\n", " ... \n", "Ticket object\n", "Fare float64\n", "Cabin object\n", "Embarked object\n", "Length: 12, dtype: object" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_titanic.dtypes" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Int64Index: 183 entries, 1 to 889\n", "Data columns (total 12 columns):\n", "PassengerId 183 non-null int64\n", "Survived 183 non-null int64\n", "Pclass 183 non-null int64\n", "Name 183 non-null object\n", "Sex 183 non-null object\n", "Age 183 non-null float64\n", "SibSp 183 non-null int64\n", "Parch 183 non-null int64\n", "Ticket 183 non-null object\n", "Fare 183 non-null float64\n", "Cabin 183 non-null object\n", "Embarked 183 non-null object\n", "dtypes: float64(2), int64(5), object(5)\n", "memory usage: 23.6+ KB\n" ] } ], "source": [ "df_titanic.info()" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 891 entries, 0 to 890\n", "Data columns (total 12 columns):\n", "PassengerId 891 non-null int64\n", "Survived 891 non-null int64\n", "Pclass 891 non-null int64\n", "Name 891 non-null object\n", "Sex 891 non-null object\n", "Age 714 non-null float64\n", "SibSp 891 non-null int64\n", "Parch 891 non-null int64\n", "Ticket 891 non-null object\n", "Fare 891 non-null float64\n", "Cabin 204 non-null object\n", "Embarked 889 non-null object\n", "dtypes: float64(2), int64(5), object(5)\n", "memory usage: 83.6+ KB\n" ] } ], "source": [ "df_titanic_raw.info() # on voit que c'est la variable cabine qui n'est pas bien renseigné, suit après l'âge" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[2, 1, 1, ..., 71.2833, 'C85', 'C'],\n", " [4, 1, 1, ..., 53.1, 'C123', 'S'],\n", " [7, 0, 1, ..., 51.8625, 'E46', 'S'],\n", " ...,\n", " [880, 1, 1, ..., 83.1583, 'C50', 'C'],\n", " [888, 1, 1, ..., 30.0, 'B42', 'S'],\n", " [890, 1, 1, ..., 30.0, 'C148', 'C']], dtype=object)" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_titanic.values # c'est la liste de valeur /array associé" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Données uni-dimensionel : Series (une colonne d'un DataFrame)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Une Series est un support de base pour les données étiquetées unidimensionnelles." ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "fare = df_titanic['Fare']" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1 71.2833\n", "3 53.1000\n", "6 51.8625\n", "10 16.7000\n", " ... \n", "872 5.0000\n", "879 83.1583\n", "887 30.0000\n", "889 30.0000\n", "Name: Fare, Length: 183, dtype: float64" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fare" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Attributs de l'objet *Series*: indices et valeurs" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Int64Index([ 1, 3, 6, 10, 11, 21, 23, 27, 52, 54,\n", " ...\n", " 835, 853, 857, 862, 867, 871, 872, 879, 887, 889],\n", " dtype='int64', length=183)" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fare.index" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([ 71.2833, 53.1 , 51.8625, 16.7 , 26.55 , 13. ,\n", " 35.5 , 263. , 76.7292, 61.9792])" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fare.values[:10]" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "51.8625" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fare[6] # existe mais fare[0] provoque une erreur, car on l'a enlevé du dataFrame, comme valeur manquante.\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Contrairement au tableau *numpy*, cet index peut être autre chose qu'un entier:" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PassengerIdSurvivedPclassSexAgeSibSpParchTicketFareCabinEmbarked
Name
Cumings, Mrs. John Bradley (Florence Briggs Thayer)211female38.010PC 1759971.2833C85C
Futrelle, Mrs. Jacques Heath (Lily May Peel)411female35.01011380353.1000C123S
McCarthy, Mr. Timothy J701male54.0001746351.8625E46S
Sandstrom, Miss. Marguerite Rut1113female4.011PP 954916.7000G6S
....................................
Carlsson, Mr. Frans Olof87301male33.0006955.0000B51 B53 B55S
Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)88011female56.0011176783.1583C50C
Graham, Miss. Margaret Edith88811female19.00011205330.0000B42S
Behr, Mr. Karl Howell89011male26.00011136930.0000C148C
\n", "

183 rows × 11 columns

\n", "
" ], "text/plain": [ " PassengerId Survived \\\n", "Name \n", "Cumings, Mrs. John Bradley (Florence Briggs Tha... 2 1 \n", "Futrelle, Mrs. Jacques Heath (Lily May Peel) 4 1 \n", "McCarthy, Mr. Timothy J 7 0 \n", "Sandstrom, Miss. Marguerite Rut 11 1 \n", "... ... ... \n", "Carlsson, Mr. Frans Olof 873 0 \n", "Potter, Mrs. Thomas Jr (Lily Alexenia Wilson) 880 1 \n", "Graham, Miss. Margaret Edith 888 1 \n", "Behr, Mr. Karl Howell 890 1 \n", "\n", " Pclass Sex Age \\\n", "Name \n", "Cumings, Mrs. John Bradley (Florence Briggs Tha... 1 female 38.0 \n", "Futrelle, Mrs. Jacques Heath (Lily May Peel) 1 female 35.0 \n", "McCarthy, Mr. Timothy J 1 male 54.0 \n", "Sandstrom, Miss. Marguerite Rut 3 female 4.0 \n", "... ... ... ... \n", "Carlsson, Mr. Frans Olof 1 male 33.0 \n", "Potter, Mrs. Thomas Jr (Lily Alexenia Wilson) 1 female 56.0 \n", "Graham, Miss. Margaret Edith 1 female 19.0 \n", "Behr, Mr. Karl Howell 1 male 26.0 \n", "\n", " SibSp Parch Ticket \\\n", "Name \n", "Cumings, Mrs. John Bradley (Florence Briggs Tha... 1 0 PC 17599 \n", "Futrelle, Mrs. Jacques Heath (Lily May Peel) 1 0 113803 \n", "McCarthy, Mr. Timothy J 0 0 17463 \n", "Sandstrom, Miss. Marguerite Rut 1 1 PP 9549 \n", "... ... ... ... \n", "Carlsson, Mr. Frans Olof 0 0 695 \n", "Potter, Mrs. Thomas Jr (Lily Alexenia Wilson) 0 1 11767 \n", "Graham, Miss. Margaret Edith 0 0 112053 \n", "Behr, Mr. Karl Howell 0 0 111369 \n", "\n", " Fare Cabin \\\n", "Name \n", "Cumings, Mrs. John Bradley (Florence Briggs Tha... 71.2833 C85 \n", "Futrelle, Mrs. Jacques Heath (Lily May Peel) 53.1000 C123 \n", "McCarthy, Mr. Timothy J 51.8625 E46 \n", "Sandstrom, Miss. Marguerite Rut 16.7000 G6 \n", "... ... ... \n", "Carlsson, Mr. Frans Olof 5.0000 B51 B53 B55 \n", "Potter, Mrs. Thomas Jr (Lily Alexenia Wilson) 83.1583 C50 \n", "Graham, Miss. Margaret Edith 30.0000 B42 \n", "Behr, Mr. Karl Howell 30.0000 C148 \n", "\n", " Embarked \n", "Name \n", "Cumings, Mrs. John Bradley (Florence Briggs Tha... C \n", "Futrelle, Mrs. Jacques Heath (Lily May Peel) S \n", "McCarthy, Mr. Timothy J S \n", "Sandstrom, Miss. Marguerite Rut S \n", "... ... \n", "Carlsson, Mr. Frans Olof S \n", "Potter, Mrs. Thomas Jr (Lily Alexenia Wilson) C \n", "Graham, Miss. Margaret Edith S \n", "Behr, Mr. Karl Howell C \n", "\n", "[183 rows x 11 columns]" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_titanic = df_titanic.set_index('Name')\n", "df_titanic" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "33.0" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "age = df_titanic['Age']\n", "age['Carlsson, Mr. Frans Olof']" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "35.6744262295082" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "age.mean()" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PassengerIdSurvivedPclassSexAgeSibSpParchTicketFareCabinEmbarked
Name
Becker, Master. Richard F18412male1.002123013639.00F4S
Allison, Master. Hudson Trevor30611male0.9212113781151.55C22 C26S
\n", "
" ], "text/plain": [ " PassengerId Survived Pclass Sex Age \\\n", "Name \n", "Becker, Master. Richard F 184 1 2 male 1.00 \n", "Allison, Master. Hudson Trevor 306 1 1 male 0.92 \n", "\n", " SibSp Parch Ticket Fare Cabin Embarked \n", "Name \n", "Becker, Master. Richard F 2 1 230136 39.00 F4 S \n", "Allison, Master. Hudson Trevor 1 2 113781 151.55 C22 C26 S " ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_titanic[age <2]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "S 644\n", "C 168\n", "Q 77\n", "Name: Embarked, dtype: int64" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_titanic_raw['Embarked'].value_counts()" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PassengerIdSurvivedPclassSexAgeSibSpParchTicketFareCabinEmbarked
Name
Cumings, Mrs. John Bradley (Florence Briggs Thayer)211female38.010PC 1759971.2833C85C
Harper, Mrs. Henry Sleeper (Myna Haxtun)5311female49.010PC 1757276.7292D33C
Ostby, Mr. Engelhart Cornelius5501male65.00111350961.9792B30C
Goldschmidt, Mr. George B9701male71.000PC 1775434.6542A5C
Greenfield, Mr. William Bertram9811male23.001PC 1775963.3583D10 D12C
Baxter, Mr. Quigg Edmond11901male24.001PC 17558247.5208B58 B60C
Giglio, Mr. Victor14001male24.000PC 1759379.2000B86C
Smith, Mr. James Clinch17501male56.0001776430.6958A7C
Isham, Miss. Ann Elizabeth17801female50.000PC 1759528.7125C49C
Brown, Mrs. James Joseph (Margaret Tobin)19511female44.000PC 1761027.7208B4C
Lurette, Miss. Elise19611female58.000PC 17569146.5208B80C
Blank, Mr. Henry21011male40.00011227731.0000A31C
Newell, Miss. Madeleine21611female31.01035273113.2750D36C
Bazzani, Miss. Albina21911female32.0001181376.2917D15C
Natsch, Mr. Charles H27401male37.001PC 1759629.7000C118C
Bishop, Mrs. Dickinson H (Helen Walton)29211female19.0101196791.0792B49C
Levy, Mr. Rene Jacques29302male36.000SC/Paris 216312.8750DC
Baxter, Mrs. James (Helene DeLaudeniere Chaput)30011female50.001PC 17558247.5208B58 B60C
Penasco y Castellana, Mrs. Victor de Satode (Maria Josefa Perez de Soto y Vallejo)30811female17.010PC 17758108.9000C65C
Francatelli, Miss. Laura Mabel31011female30.000PC 1748556.9292E36C
Hays, Miss. Margaret Bechstein31111female24.0001176783.1583C54C
Ryerson, Miss. Emily Borie31211female18.022PC 17608262.3750B57 B59 B63 B66C
Spedden, Mrs. Frederic Oakley (Margaretta Corning Stone)32011female40.01116966134.5000E34C
Young, Miss. Marie Grice32611female36.000PC 17760135.6333C32C
Hippach, Miss. Jean Gertrude33011female16.00111136157.9792B18C
Burns, Miss. Elizabeth Margaret33811female41.00016966134.5000E40C
Warren, Mrs. Frank Manley (Anna Sophia Atkinson)36711female60.01011081375.2500D37C
Aubart, Mme. Leontine Pauline37011female24.000PC 1747769.3000B35C
Harder, Mr. George Achilles37111male25.0101176555.4417E50C
Widener, Mr. Harry Elkins37801male27.002113503211.5000C82C
Newell, Miss. Marjorie39411female23.01035273113.2750D36C
Foreman, Mr. Benjamin Laventall45301male30.00011305127.7500C111C
Goldenberg, Mr. Samuel L45411male49.0101745389.1042C92C
Jerwan, Mrs. Amin S (Marie Marthe Thuillard)47412female23.000SC/AH Basle 54113.7917DC
Bishop, Mr. Dickinson H48511male25.0101196791.0792B49C
Kent, Mr. Edward Austin48801male58.0001177129.7000B37C
Eustis, Miss. Elizabeth Mussey49711female54.0103694778.2667D20C
Penasco y Castellana, Mr. Victor de Satode50601male18.010PC 17758108.9000C65C
Hippach, Mrs. Louis Albert (Ida Sophia Fischer)52411female44.00111136157.9792B18C
Frolicher, Miss. Hedwig Margaritha54011female22.0021356849.5000B39C
Douglas, Mr. Walter Donald54501male50.010PC 17761106.4250C86C
Thayer, Mr. John Borland Jr55111male17.00217421110.8833C70C
Duff Gordon, Lady. (Lucille Christiana Sutherland) (\"Mrs Morgan\")55711female48.0101175539.6000A16C
Thayer, Mrs. John Borland (Marian Longstreth Morris)58211female39.01117421110.8833C68C
Ross, Mr. John Hugo58401male36.0001304940.1250A10C
Frolicher-Stehli, Mr. Maxmillian58811male60.0111356779.2000B41C
Stephenson, Mrs. Walter Bertram (Martha Eustis)59211female52.0103694778.2667D20C
Duff Gordon, Sir. Cosmo Edmund (\"Mr Morgan\")60011male49.010PC 1748556.9292A20C
Stahelin-Maeglin, Dr. Max63311male32.0001321430.5000B50C
Sagesser, Mlle. Emma64211female24.000PC 1747769.3000B35C
Harper, Mr. Henry Sleeper64611male48.010PC 1757276.7292D33C
Simonius-Blumer, Col. Oberst Alfons64811male56.0001321335.5000A26C
Newell, Mr. Arthur Webster66001male58.00235273113.2750D48C
Cardeza, Mr. Thomas Drake Martinez68011male36.001PC 17755512.3292B51 B53 B55C
Hassab, Mr. Hammad68211male27.000PC 1757276.7292D49C
Thayer, Mr. John Borland69901male49.01117421110.8833C68C
Astor, Mrs. John Jacob (Madeleine Talmadge Force)70111female18.010PC 17757227.5250C62 C64C
Mayne, Mlle. Berthe Antonine (\"Mrs de Villiers\")71111female24.000PC 1748249.5042C90C
Endres, Miss. Caroline Louise71711female38.000PC 17757227.5250C45C
Lesurer, Mr. Gustave J73811male35.000PC 17755512.3292B101C
Ryerson, Miss. Susan Parker \"Suzette\"74311female21.022PC 17608262.3750B57 B59 B63 B66C
Guggenheim, Mr. Benjamin79001male46.000PC 1759379.2000B82 B84C
Compton, Miss. Sara Rebecca83611female39.011PC 1775683.1583E49C
Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)88011female56.0011176783.1583C50C
Behr, Mr. Karl Howell89011male26.00011136930.0000C148C
\n", "
" ], "text/plain": [ " PassengerId Survived \\\n", "Name \n", "Cumings, Mrs. John Bradley (Florence Briggs Tha... 2 1 \n", "Harper, Mrs. Henry Sleeper (Myna Haxtun) 53 1 \n", "Ostby, Mr. Engelhart Cornelius 55 0 \n", "Goldschmidt, Mr. George B 97 0 \n", "Greenfield, Mr. William Bertram 98 1 \n", "Baxter, Mr. Quigg Edmond 119 0 \n", "Giglio, Mr. Victor 140 0 \n", "Smith, Mr. James Clinch 175 0 \n", "Isham, Miss. Ann Elizabeth 178 0 \n", "Brown, Mrs. James Joseph (Margaret Tobin) 195 1 \n", "Lurette, Miss. Elise 196 1 \n", "Blank, Mr. Henry 210 1 \n", "Newell, Miss. Madeleine 216 1 \n", "Bazzani, Miss. Albina 219 1 \n", "Natsch, Mr. Charles H 274 0 \n", "Bishop, Mrs. Dickinson H (Helen Walton) 292 1 \n", "Levy, Mr. Rene Jacques 293 0 \n", "Baxter, Mrs. James (Helene DeLaudeniere Chaput) 300 1 \n", "Penasco y Castellana, Mrs. Victor de Satode (Ma... 308 1 \n", "Francatelli, Miss. Laura Mabel 310 1 \n", "Hays, Miss. Margaret Bechstein 311 1 \n", "Ryerson, Miss. Emily Borie 312 1 \n", "Spedden, Mrs. Frederic Oakley (Margaretta Corni... 320 1 \n", "Young, Miss. Marie Grice 326 1 \n", "Hippach, Miss. Jean Gertrude 330 1 \n", "Burns, Miss. Elizabeth Margaret 338 1 \n", "Warren, Mrs. Frank Manley (Anna Sophia Atkinson) 367 1 \n", "Aubart, Mme. Leontine Pauline 370 1 \n", "Harder, Mr. George Achilles 371 1 \n", "Widener, Mr. Harry Elkins 378 0 \n", "Newell, Miss. Marjorie 394 1 \n", "Foreman, Mr. Benjamin Laventall 453 0 \n", "Goldenberg, Mr. Samuel L 454 1 \n", "Jerwan, Mrs. Amin S (Marie Marthe Thuillard) 474 1 \n", "Bishop, Mr. Dickinson H 485 1 \n", "Kent, Mr. Edward Austin 488 0 \n", "Eustis, Miss. Elizabeth Mussey 497 1 \n", "Penasco y Castellana, Mr. Victor de Satode 506 0 \n", "Hippach, Mrs. Louis Albert (Ida Sophia Fischer) 524 1 \n", "Frolicher, Miss. Hedwig Margaritha 540 1 \n", "Douglas, Mr. Walter Donald 545 0 \n", "Thayer, Mr. John Borland Jr 551 1 \n", "Duff Gordon, Lady. (Lucille Christiana Sutherla... 557 1 \n", "Thayer, Mrs. John Borland (Marian Longstreth Mo... 582 1 \n", "Ross, Mr. John Hugo 584 0 \n", "Frolicher-Stehli, Mr. Maxmillian 588 1 \n", "Stephenson, Mrs. Walter Bertram (Martha Eustis) 592 1 \n", "Duff Gordon, Sir. Cosmo Edmund (\"Mr Morgan\") 600 1 \n", "Stahelin-Maeglin, Dr. Max 633 1 \n", "Sagesser, Mlle. Emma 642 1 \n", "Harper, Mr. Henry Sleeper 646 1 \n", "Simonius-Blumer, Col. Oberst Alfons 648 1 \n", "Newell, Mr. Arthur Webster 660 0 \n", "Cardeza, Mr. Thomas Drake Martinez 680 1 \n", "Hassab, Mr. Hammad 682 1 \n", "Thayer, Mr. John Borland 699 0 \n", "Astor, Mrs. John Jacob (Madeleine Talmadge Force) 701 1 \n", "Mayne, Mlle. Berthe Antonine (\"Mrs de Villiers\") 711 1 \n", "Endres, Miss. Caroline Louise 717 1 \n", "Lesurer, Mr. Gustave J 738 1 \n", "Ryerson, Miss. Susan Parker \"Suzette\" 743 1 \n", "Guggenheim, Mr. Benjamin 790 0 \n", "Compton, Miss. Sara Rebecca 836 1 \n", "Potter, Mrs. Thomas Jr (Lily Alexenia Wilson) 880 1 \n", "Behr, Mr. Karl Howell 890 1 \n", "\n", " Pclass Sex Age \\\n", "Name \n", "Cumings, Mrs. John Bradley (Florence Briggs Tha... 1 female 38.0 \n", "Harper, Mrs. Henry Sleeper (Myna Haxtun) 1 female 49.0 \n", "Ostby, Mr. Engelhart Cornelius 1 male 65.0 \n", "Goldschmidt, Mr. George B 1 male 71.0 \n", "Greenfield, Mr. William Bertram 1 male 23.0 \n", "Baxter, Mr. Quigg Edmond 1 male 24.0 \n", "Giglio, Mr. Victor 1 male 24.0 \n", "Smith, Mr. James Clinch 1 male 56.0 \n", "Isham, Miss. Ann Elizabeth 1 female 50.0 \n", "Brown, Mrs. James Joseph (Margaret Tobin) 1 female 44.0 \n", "Lurette, Miss. Elise 1 female 58.0 \n", "Blank, Mr. Henry 1 male 40.0 \n", "Newell, Miss. Madeleine 1 female 31.0 \n", "Bazzani, Miss. Albina 1 female 32.0 \n", "Natsch, Mr. Charles H 1 male 37.0 \n", "Bishop, Mrs. Dickinson H (Helen Walton) 1 female 19.0 \n", "Levy, Mr. Rene Jacques 2 male 36.0 \n", "Baxter, Mrs. James (Helene DeLaudeniere Chaput) 1 female 50.0 \n", "Penasco y Castellana, Mrs. Victor de Satode (Ma... 1 female 17.0 \n", "Francatelli, Miss. Laura Mabel 1 female 30.0 \n", "Hays, Miss. Margaret Bechstein 1 female 24.0 \n", "Ryerson, Miss. Emily Borie 1 female 18.0 \n", "Spedden, Mrs. Frederic Oakley (Margaretta Corni... 1 female 40.0 \n", "Young, Miss. Marie Grice 1 female 36.0 \n", "Hippach, Miss. Jean Gertrude 1 female 16.0 \n", "Burns, Miss. Elizabeth Margaret 1 female 41.0 \n", "Warren, Mrs. Frank Manley (Anna Sophia Atkinson) 1 female 60.0 \n", "Aubart, Mme. Leontine Pauline 1 female 24.0 \n", "Harder, Mr. George Achilles 1 male 25.0 \n", "Widener, Mr. Harry Elkins 1 male 27.0 \n", "Newell, Miss. Marjorie 1 female 23.0 \n", "Foreman, Mr. Benjamin Laventall 1 male 30.0 \n", "Goldenberg, Mr. Samuel L 1 male 49.0 \n", "Jerwan, Mrs. Amin S (Marie Marthe Thuillard) 2 female 23.0 \n", "Bishop, Mr. Dickinson H 1 male 25.0 \n", "Kent, Mr. Edward Austin 1 male 58.0 \n", "Eustis, Miss. Elizabeth Mussey 1 female 54.0 \n", "Penasco y Castellana, Mr. Victor de Satode 1 male 18.0 \n", "Hippach, Mrs. Louis Albert (Ida Sophia Fischer) 1 female 44.0 \n", "Frolicher, Miss. Hedwig Margaritha 1 female 22.0 \n", "Douglas, Mr. Walter Donald 1 male 50.0 \n", "Thayer, Mr. John Borland Jr 1 male 17.0 \n", "Duff Gordon, Lady. (Lucille Christiana Sutherla... 1 female 48.0 \n", "Thayer, Mrs. John Borland (Marian Longstreth Mo... 1 female 39.0 \n", "Ross, Mr. John Hugo 1 male 36.0 \n", "Frolicher-Stehli, Mr. Maxmillian 1 male 60.0 \n", "Stephenson, Mrs. Walter Bertram (Martha Eustis) 1 female 52.0 \n", "Duff Gordon, Sir. Cosmo Edmund (\"Mr Morgan\") 1 male 49.0 \n", "Stahelin-Maeglin, Dr. Max 1 male 32.0 \n", "Sagesser, Mlle. Emma 1 female 24.0 \n", "Harper, Mr. Henry Sleeper 1 male 48.0 \n", "Simonius-Blumer, Col. Oberst Alfons 1 male 56.0 \n", "Newell, Mr. Arthur Webster 1 male 58.0 \n", "Cardeza, Mr. Thomas Drake Martinez 1 male 36.0 \n", "Hassab, Mr. Hammad 1 male 27.0 \n", "Thayer, Mr. John Borland 1 male 49.0 \n", "Astor, Mrs. John Jacob (Madeleine Talmadge Force) 1 female 18.0 \n", "Mayne, Mlle. Berthe Antonine (\"Mrs de Villiers\") 1 female 24.0 \n", "Endres, Miss. Caroline Louise 1 female 38.0 \n", "Lesurer, Mr. Gustave J 1 male 35.0 \n", "Ryerson, Miss. Susan Parker \"Suzette\" 1 female 21.0 \n", "Guggenheim, Mr. Benjamin 1 male 46.0 \n", "Compton, Miss. Sara Rebecca 1 female 39.0 \n", "Potter, Mrs. Thomas Jr (Lily Alexenia Wilson) 1 female 56.0 \n", "Behr, Mr. Karl Howell 1 male 26.0 \n", "\n", " SibSp Parch \\\n", "Name \n", "Cumings, Mrs. John Bradley (Florence Briggs Tha... 1 0 \n", "Harper, Mrs. Henry Sleeper (Myna Haxtun) 1 0 \n", "Ostby, Mr. Engelhart Cornelius 0 1 \n", "Goldschmidt, Mr. George B 0 0 \n", "Greenfield, Mr. William Bertram 0 1 \n", "Baxter, Mr. Quigg Edmond 0 1 \n", "Giglio, Mr. Victor 0 0 \n", "Smith, Mr. James Clinch 0 0 \n", "Isham, Miss. Ann Elizabeth 0 0 \n", "Brown, Mrs. James Joseph (Margaret Tobin) 0 0 \n", "Lurette, Miss. Elise 0 0 \n", "Blank, Mr. Henry 0 0 \n", "Newell, Miss. Madeleine 1 0 \n", "Bazzani, Miss. Albina 0 0 \n", "Natsch, Mr. Charles H 0 1 \n", "Bishop, Mrs. Dickinson H (Helen Walton) 1 0 \n", "Levy, Mr. Rene Jacques 0 0 \n", "Baxter, Mrs. James (Helene DeLaudeniere Chaput) 0 1 \n", "Penasco y Castellana, Mrs. Victor de Satode (Ma... 1 0 \n", "Francatelli, Miss. Laura Mabel 0 0 \n", "Hays, Miss. Margaret Bechstein 0 0 \n", "Ryerson, Miss. Emily Borie 2 2 \n", "Spedden, Mrs. Frederic Oakley (Margaretta Corni... 1 1 \n", "Young, Miss. Marie Grice 0 0 \n", "Hippach, Miss. Jean Gertrude 0 1 \n", "Burns, Miss. Elizabeth Margaret 0 0 \n", "Warren, Mrs. Frank Manley (Anna Sophia Atkinson) 1 0 \n", "Aubart, Mme. Leontine Pauline 0 0 \n", "Harder, Mr. George Achilles 1 0 \n", "Widener, Mr. Harry Elkins 0 2 \n", "Newell, Miss. Marjorie 1 0 \n", "Foreman, Mr. Benjamin Laventall 0 0 \n", "Goldenberg, Mr. Samuel L 1 0 \n", "Jerwan, Mrs. Amin S (Marie Marthe Thuillard) 0 0 \n", "Bishop, Mr. Dickinson H 1 0 \n", "Kent, Mr. Edward Austin 0 0 \n", "Eustis, Miss. Elizabeth Mussey 1 0 \n", "Penasco y Castellana, Mr. Victor de Satode 1 0 \n", "Hippach, Mrs. Louis Albert (Ida Sophia Fischer) 0 1 \n", "Frolicher, Miss. Hedwig Margaritha 0 2 \n", "Douglas, Mr. Walter Donald 1 0 \n", "Thayer, Mr. John Borland Jr 0 2 \n", "Duff Gordon, Lady. (Lucille Christiana Sutherla... 1 0 \n", "Thayer, Mrs. John Borland (Marian Longstreth Mo... 1 1 \n", "Ross, Mr. John Hugo 0 0 \n", "Frolicher-Stehli, Mr. Maxmillian 1 1 \n", "Stephenson, Mrs. Walter Bertram (Martha Eustis) 1 0 \n", "Duff Gordon, Sir. Cosmo Edmund (\"Mr Morgan\") 1 0 \n", "Stahelin-Maeglin, Dr. Max 0 0 \n", "Sagesser, Mlle. Emma 0 0 \n", "Harper, Mr. Henry Sleeper 1 0 \n", "Simonius-Blumer, Col. Oberst Alfons 0 0 \n", "Newell, Mr. Arthur Webster 0 2 \n", "Cardeza, Mr. Thomas Drake Martinez 0 1 \n", "Hassab, Mr. Hammad 0 0 \n", "Thayer, Mr. John Borland 1 1 \n", "Astor, Mrs. John Jacob (Madeleine Talmadge Force) 1 0 \n", "Mayne, Mlle. Berthe Antonine (\"Mrs de Villiers\") 0 0 \n", "Endres, Miss. Caroline Louise 0 0 \n", "Lesurer, Mr. Gustave J 0 0 \n", "Ryerson, Miss. Susan Parker \"Suzette\" 2 2 \n", "Guggenheim, Mr. Benjamin 0 0 \n", "Compton, Miss. Sara Rebecca 1 1 \n", "Potter, Mrs. Thomas Jr (Lily Alexenia Wilson) 0 1 \n", "Behr, Mr. Karl Howell 0 0 \n", "\n", " Ticket Fare \\\n", "Name \n", "Cumings, Mrs. John Bradley (Florence Briggs Tha... PC 17599 71.2833 \n", "Harper, Mrs. Henry Sleeper (Myna Haxtun) PC 17572 76.7292 \n", "Ostby, Mr. Engelhart Cornelius 113509 61.9792 \n", "Goldschmidt, Mr. George B PC 17754 34.6542 \n", "Greenfield, Mr. William Bertram PC 17759 63.3583 \n", "Baxter, Mr. Quigg Edmond PC 17558 247.5208 \n", "Giglio, Mr. Victor PC 17593 79.2000 \n", "Smith, Mr. James Clinch 17764 30.6958 \n", "Isham, Miss. Ann Elizabeth PC 17595 28.7125 \n", "Brown, Mrs. James Joseph (Margaret Tobin) PC 17610 27.7208 \n", "Lurette, Miss. Elise PC 17569 146.5208 \n", "Blank, Mr. Henry 112277 31.0000 \n", "Newell, Miss. Madeleine 35273 113.2750 \n", "Bazzani, Miss. Albina 11813 76.2917 \n", "Natsch, Mr. Charles H PC 17596 29.7000 \n", "Bishop, Mrs. Dickinson H (Helen Walton) 11967 91.0792 \n", "Levy, Mr. Rene Jacques SC/Paris 2163 12.8750 \n", "Baxter, Mrs. James (Helene DeLaudeniere Chaput) PC 17558 247.5208 \n", "Penasco y Castellana, Mrs. Victor de Satode (Ma... PC 17758 108.9000 \n", "Francatelli, Miss. Laura Mabel PC 17485 56.9292 \n", "Hays, Miss. Margaret Bechstein 11767 83.1583 \n", "Ryerson, Miss. Emily Borie PC 17608 262.3750 \n", "Spedden, Mrs. Frederic Oakley (Margaretta Corni... 16966 134.5000 \n", "Young, Miss. Marie Grice PC 17760 135.6333 \n", "Hippach, Miss. Jean Gertrude 111361 57.9792 \n", "Burns, Miss. Elizabeth Margaret 16966 134.5000 \n", "Warren, Mrs. Frank Manley (Anna Sophia Atkinson) 110813 75.2500 \n", "Aubart, Mme. Leontine Pauline PC 17477 69.3000 \n", "Harder, Mr. George Achilles 11765 55.4417 \n", "Widener, Mr. Harry Elkins 113503 211.5000 \n", "Newell, Miss. Marjorie 35273 113.2750 \n", "Foreman, Mr. Benjamin Laventall 113051 27.7500 \n", "Goldenberg, Mr. Samuel L 17453 89.1042 \n", "Jerwan, Mrs. Amin S (Marie Marthe Thuillard) SC/AH Basle 541 13.7917 \n", "Bishop, Mr. Dickinson H 11967 91.0792 \n", "Kent, Mr. Edward Austin 11771 29.7000 \n", "Eustis, Miss. Elizabeth Mussey 36947 78.2667 \n", "Penasco y Castellana, Mr. Victor de Satode PC 17758 108.9000 \n", "Hippach, Mrs. Louis Albert (Ida Sophia Fischer) 111361 57.9792 \n", "Frolicher, Miss. Hedwig Margaritha 13568 49.5000 \n", "Douglas, Mr. Walter Donald PC 17761 106.4250 \n", "Thayer, Mr. John Borland Jr 17421 110.8833 \n", "Duff Gordon, Lady. (Lucille Christiana Sutherla... 11755 39.6000 \n", "Thayer, Mrs. John Borland (Marian Longstreth Mo... 17421 110.8833 \n", "Ross, Mr. John Hugo 13049 40.1250 \n", "Frolicher-Stehli, Mr. Maxmillian 13567 79.2000 \n", "Stephenson, Mrs. Walter Bertram (Martha Eustis) 36947 78.2667 \n", "Duff Gordon, Sir. Cosmo Edmund (\"Mr Morgan\") PC 17485 56.9292 \n", "Stahelin-Maeglin, Dr. Max 13214 30.5000 \n", "Sagesser, Mlle. Emma PC 17477 69.3000 \n", "Harper, Mr. Henry Sleeper PC 17572 76.7292 \n", "Simonius-Blumer, Col. Oberst Alfons 13213 35.5000 \n", "Newell, Mr. Arthur Webster 35273 113.2750 \n", "Cardeza, Mr. Thomas Drake Martinez PC 17755 512.3292 \n", "Hassab, Mr. Hammad PC 17572 76.7292 \n", "Thayer, Mr. John Borland 17421 110.8833 \n", "Astor, Mrs. John Jacob (Madeleine Talmadge Force) PC 17757 227.5250 \n", "Mayne, Mlle. Berthe Antonine (\"Mrs de Villiers\") PC 17482 49.5042 \n", "Endres, Miss. Caroline Louise PC 17757 227.5250 \n", "Lesurer, Mr. Gustave J PC 17755 512.3292 \n", "Ryerson, Miss. Susan Parker \"Suzette\" PC 17608 262.3750 \n", "Guggenheim, Mr. Benjamin PC 17593 79.2000 \n", "Compton, Miss. Sara Rebecca PC 17756 83.1583 \n", "Potter, Mrs. Thomas Jr (Lily Alexenia Wilson) 11767 83.1583 \n", "Behr, Mr. Karl Howell 111369 30.0000 \n", "\n", " Cabin Embarked \n", "Name \n", "Cumings, Mrs. John Bradley (Florence Briggs Tha... C85 C \n", "Harper, Mrs. Henry Sleeper (Myna Haxtun) D33 C \n", "Ostby, Mr. Engelhart Cornelius B30 C \n", "Goldschmidt, Mr. George B A5 C \n", "Greenfield, Mr. William Bertram D10 D12 C \n", "Baxter, Mr. Quigg Edmond B58 B60 C \n", "Giglio, Mr. Victor B86 C \n", "Smith, Mr. James Clinch A7 C \n", "Isham, Miss. Ann Elizabeth C49 C \n", "Brown, Mrs. James Joseph (Margaret Tobin) B4 C \n", "Lurette, Miss. Elise B80 C \n", "Blank, Mr. Henry A31 C \n", "Newell, Miss. Madeleine D36 C \n", "Bazzani, Miss. Albina D15 C \n", "Natsch, Mr. Charles H C118 C \n", "Bishop, Mrs. Dickinson H (Helen Walton) B49 C \n", "Levy, Mr. Rene Jacques D C \n", "Baxter, Mrs. James (Helene DeLaudeniere Chaput) B58 B60 C \n", "Penasco y Castellana, Mrs. Victor de Satode (Ma... C65 C \n", "Francatelli, Miss. Laura Mabel E36 C \n", "Hays, Miss. Margaret Bechstein C54 C \n", "Ryerson, Miss. Emily Borie B57 B59 B63 B66 C \n", "Spedden, Mrs. Frederic Oakley (Margaretta Corni... E34 C \n", "Young, Miss. Marie Grice C32 C \n", "Hippach, Miss. Jean Gertrude B18 C \n", "Burns, Miss. Elizabeth Margaret E40 C \n", "Warren, Mrs. Frank Manley (Anna Sophia Atkinson) D37 C \n", "Aubart, Mme. Leontine Pauline B35 C \n", "Harder, Mr. George Achilles E50 C \n", "Widener, Mr. Harry Elkins C82 C \n", "Newell, Miss. Marjorie D36 C \n", "Foreman, Mr. Benjamin Laventall C111 C \n", "Goldenberg, Mr. Samuel L C92 C \n", "Jerwan, Mrs. Amin S (Marie Marthe Thuillard) D C \n", "Bishop, Mr. Dickinson H B49 C \n", "Kent, Mr. Edward Austin B37 C \n", "Eustis, Miss. Elizabeth Mussey D20 C \n", "Penasco y Castellana, Mr. Victor de Satode C65 C \n", "Hippach, Mrs. Louis Albert (Ida Sophia Fischer) B18 C \n", "Frolicher, Miss. Hedwig Margaritha B39 C \n", "Douglas, Mr. Walter Donald C86 C \n", "Thayer, Mr. John Borland Jr C70 C \n", "Duff Gordon, Lady. (Lucille Christiana Sutherla... A16 C \n", "Thayer, Mrs. John Borland (Marian Longstreth Mo... C68 C \n", "Ross, Mr. John Hugo A10 C \n", "Frolicher-Stehli, Mr. Maxmillian B41 C \n", "Stephenson, Mrs. Walter Bertram (Martha Eustis) D20 C \n", "Duff Gordon, Sir. Cosmo Edmund (\"Mr Morgan\") A20 C \n", "Stahelin-Maeglin, Dr. Max B50 C \n", "Sagesser, Mlle. Emma B35 C \n", "Harper, Mr. Henry Sleeper D33 C \n", "Simonius-Blumer, Col. Oberst Alfons A26 C \n", "Newell, Mr. Arthur Webster D48 C \n", "Cardeza, Mr. Thomas Drake Martinez B51 B53 B55 C \n", "Hassab, Mr. Hammad D49 C \n", "Thayer, Mr. John Borland C68 C \n", "Astor, Mrs. John Jacob (Madeleine Talmadge Force) C62 C64 C \n", "Mayne, Mlle. Berthe Antonine (\"Mrs de Villiers\") C90 C \n", "Endres, Miss. Caroline Louise C45 C \n", "Lesurer, Mr. Gustave J B101 C \n", "Ryerson, Miss. Susan Parker \"Suzette\" B57 B59 B63 B66 C \n", "Guggenheim, Mr. Benjamin B82 B84 C \n", "Compton, Miss. Sara Rebecca E49 C \n", "Potter, Mrs. Thomas Jr (Lily Alexenia Wilson) C50 C \n", "Behr, Mr. Karl Howell C148 C " ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.options.display.max_rows = 70\n", "df_titanic[df_titanic['Embarked']=='C'] # Les passagers montés à Cherbourg n'ont pas des noms gaulois..." ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": [ "pd.options.display.max_rows = 8" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.3838383838383838" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_titanic_raw['Survived'].sum() / df_titanic_raw['Survived'].count()" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.13804713804713806" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_titanic['Survived'].sum() / df_titanic_raw['Survived'].count()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "** Quelle était la proportion de femmes sur le bateau? **" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Sex\n", "female 0.352413\n", "male 0.647587\n", "dtype: float64" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_titanic_raw.groupby(['Sex']).size() / df_titanic_raw['Sex'].count()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Data import et export\n", "\n", "Pandas supports nativement une large gamme de formats d'entrée / sortie:\n", "- CSV, text\n", "- SQL database\n", "- Excel\n", "- HDF5\n", "- json\n", "- html\n", "- pickle\n", "- sas, stata\n", "- ..." ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [], "source": [ "# pd.read_csv?" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Exploration" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
88688702Montvila, Rev. Juozasmale27.00021153613.00NaNS
88788811Graham, Miss. Margaret Edithfemale19.00011205330.00B42S
88888903Johnston, Miss. Catherine Helen \"Carrie\"femaleNaN12W./C. 660723.45NaNS
88989011Behr, Mr. Karl Howellmale26.00011136930.00C148C
89089103Dooley, Mr. Patrickmale32.0003703767.75NaNQ
\n", "
" ], "text/plain": [ " PassengerId Survived Pclass Name \\\n", "886 887 0 2 Montvila, Rev. Juozas \n", "887 888 1 1 Graham, Miss. Margaret Edith \n", "888 889 0 3 Johnston, Miss. Catherine Helen \"Carrie\" \n", "889 890 1 1 Behr, Mr. Karl Howell \n", "890 891 0 3 Dooley, Mr. Patrick \n", "\n", " Sex Age SibSp Parch Ticket Fare Cabin Embarked \n", "886 male 27.0 0 0 211536 13.00 NaN S \n", "887 female 19.0 0 0 112053 30.00 B42 S \n", "888 female NaN 1 2 W./C. 6607 23.45 NaN S \n", "889 male 26.0 0 0 111369 30.00 C148 C \n", "890 male 32.0 0 0 370376 7.75 NaN Q " ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_titanic_raw.tail()" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
0103Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNS
1211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C
2313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNS
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S
4503Allen, Mr. William Henrymale35.0003734508.0500NaNS
\n", "
" ], "text/plain": [ " PassengerId Survived Pclass \\\n", "0 1 0 3 \n", "1 2 1 1 \n", "2 3 1 3 \n", "3 4 1 1 \n", "4 5 0 3 \n", "\n", " Name Sex Age SibSp \\\n", "0 Braund, Mr. Owen Harris male 22.0 1 \n", "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", "2 Heikkinen, Miss. Laina female 26.0 0 \n", "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", "4 Allen, Mr. William Henry male 35.0 0 \n", "\n", " Parch Ticket Fare Cabin Embarked \n", "0 0 A/5 21171 7.2500 NaN S \n", "1 0 PC 17599 71.2833 C85 C \n", "2 0 STON/O2. 3101282 7.9250 NaN S \n", "3 0 113803 53.1000 C123 S \n", "4 0 373450 8.0500 NaN S " ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_titanic_raw.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### iloc" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SurvivedPclassSexAgeSibSpParchTicket
Name
Cumings, Mrs. John Bradley (Florence Briggs Thayer)11female38.010PC 17599
Futrelle, Mrs. Jacques Heath (Lily May Peel)11female35.010113803
\n", "
" ], "text/plain": [ " Survived Pclass Sex \\\n", "Name \n", "Cumings, Mrs. John Bradley (Florence Briggs Tha... 1 1 female \n", "Futrelle, Mrs. Jacques Heath (Lily May Peel) 1 1 female \n", "\n", " Age SibSp Parch \\\n", "Name \n", "Cumings, Mrs. John Bradley (Florence Briggs Tha... 38.0 1 0 \n", "Futrelle, Mrs. Jacques Heath (Lily May Peel) 35.0 1 0 \n", "\n", " Ticket \n", "Name \n", "Cumings, Mrs. John Bradley (Florence Briggs Tha... PC 17599 \n", "Futrelle, Mrs. Jacques Heath (Lily May Peel) 113803 " ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_titanic.iloc[0:2,1:8]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### loc" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "26.55" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_titanic.loc['Bonnell, Miss. Elizabeth', 'Fare']" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "PassengerId 12\n", "Survived 1\n", "Pclass 1\n", "Sex female\n", " ... \n", "Ticket 113783\n", "Fare 26.55\n", "Cabin C103\n", "Embarked S\n", "Name: Bonnell, Miss. Elizabeth, Length: 11, dtype: object" ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_titanic.loc['Bonnell, Miss. Elizabeth']" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [], "source": [ "df_titanic.loc['Bonnell, Miss. Elizabeth','Survived']= 100" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "PassengerId 12\n", "Survived 100\n", "Pclass 1\n", "Sex female\n", " ... \n", "Ticket 113783\n", "Fare 26.55\n", "Cabin C103\n", "Embarked S\n", "Name: Bonnell, Miss. Elizabeth, Length: 11, dtype: object" ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_titanic.loc['Bonnell, Miss. Elizabeth']" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [], "source": [ "df_titanic.loc['Bonnell, Miss. Elizabeth','Survived']= 1 # On remet la valeur comme avant " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### L'opération group-by:" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PassengerIdSurvivedPclassAgeSibSpParchFare
Sex
female461.8181820.9318181.21590932.6761360.5340910.54545589.000900
male449.3894740.4315791.16842138.4517890.4000000.41052669.124343
\n", "
" ], "text/plain": [ " PassengerId Survived Pclass Age SibSp Parch \\\n", "Sex \n", "female 461.818182 0.931818 1.215909 32.676136 0.534091 0.545455 \n", "male 449.389474 0.431579 1.168421 38.451789 0.400000 0.410526 \n", "\n", " Fare \n", "Sex \n", "female 89.000900 \n", "male 69.124343 " ] }, "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_titanic.groupby('Sex').mean()" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Sex\n", "female 44.479818\n", "male 25.523893\n", "Name: Fare, dtype: float64" ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_titanic_raw.groupby('Sex').mean()['Fare']" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [], "source": [ "df_titanic['AgeClass'] = pd.cut(df_titanic['Age'], bins=np.arange(0,90,10)) # créer des classes / découpes." ] } ], "metadata": { "celltoolbar": "Slideshow", "hide_input": false, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": false, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": true } }, "nbformat": 4, "nbformat_minor": 2 }