{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Statistsics w/ NumPy" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### np.mean()" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[1, 0, 0, 3, 1],\n", " [3, 6, 6, 2, 9],\n", " [4, 5, 3, 8, 0]])" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "matrix_A = np.array([[1,0,0,3,1],[3,6,6,2,9],[4,5,3,8,0]])\n", "matrix_A" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "array([1. , 5.2, 4. ])" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.mean(matrix_A, axis = 1)\n", "\n", "# We can call the function over a given axis (e.g for every row or column when it comes to 2-D arrays)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "3.4" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "matrix_A.mean()\n", "\n", "# An equivalent method exists." ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "ename": "AttributeError", "evalue": "'numpy.ndarray' object has no attribute 'sqrt'", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mmatrix_A\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msqrt\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[1;31mAttributeError\u001b[0m: 'numpy.ndarray' object has no attribute 'sqrt'" ] } ], "source": [ "# matrix_A.sqrt() \n", "\n", "# Results in an attribute error, since not every function has an equivalent method. " ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "array([1, 5, 4], dtype=int64)" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.mean(matrix_A, axis = 1, dtype = np.int64)\n", "\n", "## We can cast the values to a specific type as well. " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Min & Max Values" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "import numpy as np" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[1, 0, 0, 3, 1],\n", " [3, 6, 6, 2, 9],\n", " [4, 5, 3, 8, 0]])" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "matrix_A = np.array([[1,0,0,3,1],[3,6,6,2,9],[4,5,3,8,0]])\n", "matrix_A" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.min(matrix_A)\n", "\n", "# Returns the minimum value. " ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.amin(matrix_A)\n", "\n", "# Equivalent function (literally just a different alias for np.min())" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([3, 5, 3, 2, 0])" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.minimum(matrix_A[1], matrix_A[2])\n", "\n", "# Elementwise minimum. Returns the lowest value out of a given set.\n", "# In this case, np.minimum() returns the lower value every position across the two arrays. (e.g. lowest value in 1st position, 2nd, etc)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([1, 0, 0, 2, 0])" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.minimum.reduce(matrix_A)\n", "\n", "## A way to make minimum() equivalent to min()" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([1, 0, 0, 2, 0])" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.min(matrix_A, axis = 0)" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "9" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.max(matrix_A)\n", "\n", "## Corresponding max, a max and maximum functions exist. " ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "9" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.amax(matrix_A)" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([4, 6, 6, 8, 9])" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.maximum.reduce(matrix_A)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Statistical Order Functions " ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "import numpy as np" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[1, 0, 0, 3, 1],\n", " [3, 6, 6, 2, 9],\n", " [4, 5, 3, 8, 0]])" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "matrix_A = np.array([[1,0,0,3,1],[3,6,6,2,9],[4,5,3,8,0]])\n", "matrix_A" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "9" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.ptp(matrix_A)\n", "\n", "## Returns difference between max and min (peak-to-peak) over the flattened array. " ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([3, 6, 6, 6, 9])" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.ptp(matrix_A, axis = 0)\n", "\n", "## Returns difference between max and min (peak-to-peak) for every column. " ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([3, 7, 8])" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.ptp(matrix_A, axis = 1)\n", "\n", "## Returns difference between max and min (peak-to-peak) for every row. " ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([0, 0, 0, 1, 1, 2, 3, 3, 3, 4, 5, 6, 6, 8, 9])" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.sort(matrix_A, axis = None)\n", "\n", "# A sorted version of the flattened matrix_A" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "numpy.float64" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(np.percentile(matrix_A, 70))\n", "# N-th Percentile = A value which is greater than n% of the dataset. " ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "9.0" ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.percentile(matrix_A, 100)\n", "\n", "# 100-th percentile = max\n", "# 50-th percentile = median\n", "# 0-th percentile = min" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "5" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.quantile(matrix_A, 0.70, interpolation = \"nearest\")\n", "\n", "# Quantile -> Similar to percentile, but works with parts of the dataset, rather than percentages. \n", "# Hence, the N-th Quantile = 100*N-th Percentile of the same dataset. " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Averages and Variances" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [], "source": [ "import numpy as np" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[1, 0, 0, 3, 1],\n", " [3, 6, 6, 2, 9],\n", " [4, 5, 3, 8, 0]])" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "matrix_A = np.array([[1,0,0,3,1],[3,6,6,2,9],[4,5,3,8,0]])\n", "matrix_A" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "3.0" ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.median(matrix_A)\n", "\n", "# Returns the median for the flattened array. \n", "# Median -> The middle value of a sorted version of the dataset. " ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([0, 0, 0, 1, 1, 2, 3, 3, 3, 4, 5, 6, 6, 8, 9])" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.sort(matrix_A, axis = None)" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "3.4" ] }, "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.mean(matrix_A)\n", "\n", "# The arithmetic average of the flattened array. " ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "3.4" ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.average(matrix_A)\n", "\n", "# The average of the flattened array. " ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[0.75915734, 0.7662218 , 0.6291028 , 0.20336599, 0.66501486],\n", " [0.06559111, 0.71326309, 0.10812106, 0.87969046, 0.49405844],\n", " [0.82472673, 0.45652944, 0.07367232, 0.69628564, 0.36690736]])" ] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from numpy.random import Generator as gen\n", "from numpy.random import PCG64 as pcg\n", "\n", "array_RG = gen(pcg(365))\n", "\n", "array_weights = array_RG.random(size = (3,5))\n", "array_weights\n", "\n", "# Generating some random weights for each entry of matrix_A (for the sake of the example)" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "3.2120290871899306" ] }, "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.average(matrix_A, weights = array_weights)\n", "\n", "# The weighted average of the flattened array. " ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "7.84" ] }, "execution_count": 53, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.var(matrix_A)\n", "\n", "# The variance of the array. " ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "2.8" ] }, "execution_count": 55, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.std(matrix_A)\n", "\n", "# The standard deviation of the array. " ] }, { "cell_type": "code", "execution_count": 56, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "7.839999999999999" ] }, "execution_count": 56, "metadata": {}, "output_type": "execute_result" } ], "source": [ "2.8**2" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Correlation" ] }, { "cell_type": "code", "execution_count": 61, "metadata": {}, "outputs": [], "source": [ "import numpy as np" ] }, { "cell_type": "code", "execution_count": 62, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[1, 0, 0, 3, 1],\n", " [3, 6, 6, 2, 9],\n", " [4, 5, 3, 8, 0]])" ] }, "execution_count": 62, "metadata": {}, "output_type": "execute_result" } ], "source": [ "matrix_A = np.array([[1,0,0,3,1],[3,6,6,2,9],[4,5,3,8,0]])\n", "matrix_A" ] }, { "cell_type": "code", "execution_count": 63, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[ 1.5, -2. , 2. ],\n", " [-2. , 7.7, -7. ],\n", " [ 2. , -7. , 8.5]])" ] }, "execution_count": 63, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.cov(matrix_A)\n", "\n", "# The covariance of every row (array) of matrix_A and every other row of the variable. " ] }, { "cell_type": "code", "execution_count": 64, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[ 1. , -0.58848989, 0.56011203],\n", " [-0.58848989, 1. , -0.8652532 ],\n", " [ 0.56011203, -0.8652532 , 1. ]])" ] }, "execution_count": 64, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.corrcoef(matrix_A)\n", "\n", "# The correlation coefficient of every row (array) of matrix_A and every other row of the variable. " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Histograms" ] }, { "cell_type": "code", "execution_count": 67, "metadata": {}, "outputs": [], "source": [ "import numpy as np" ] }, { "cell_type": "code", "execution_count": 68, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[1, 0, 0, 3, 1],\n", " [3, 6, 6, 2, 9],\n", " [4, 5, 3, 8, 0]])" ] }, "execution_count": 68, "metadata": {}, "output_type": "execute_result" } ], "source": [ "matrix_A = np.array([[1,0,0,3,1],[3,6,6,2,9],[4,5,3,8,0]])\n", "matrix_A" ] }, { "cell_type": "code", "execution_count": 69, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([0, 0, 0, 1, 1, 2, 3, 3, 3, 4, 5, 6, 6, 8, 9])" ] }, "execution_count": 69, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.sort(matrix_A, axis = None)" ] }, { "cell_type": "code", "execution_count": 74, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(array([3, 3, 2, 2], dtype=int64), array([1. , 2.5, 4. , 5.5, 7. ]))" ] }, "execution_count": 74, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.histogram(matrix_A, bins = 4, range = (1,7))\n", "\n", "# Computes the bin edges and how many points fall in each bin. \n", "# The 1-st array contains the number of points. The 2-nd array contains the bin edges. " ] }, { "cell_type": "code", "execution_count": 75, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAD4CAYAAAD8Zh1EAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAANkUlEQVR4nO3cX6icdX7H8fdnkyz7xy0p5IA2fzyWhrbrglUOMVYosrsF/9HceKGwCt4ExW21CMV64dI7C0VajRjCardScSkqEtbY7UItqxdak2z8E7NCsHZzaopZl01Mldq0316ccT2OZ87MyZmTJ+d33i8YMjPPb2a+Pp68mTznmUlVIUla/j7X9QCSpPEw6JLUCIMuSY0w6JLUCIMuSY1Y3dULr1u3riYnJ7t6eUlalvbt2/fzqpqYa1tnQZ+cnGTv3r1dvbwkLUtJ/n3QNg+5SFIjDLokNcKgS1IjDLokNcKgS1IjDLokNWJo0JN8Icm/JnklycEkfzHHmiS5P8nhJK8muWRpxpUkDTLKeej/DXy9qk4mWQO8kOTZqnpx1pqrgM29y6XAQ70/JUlnyNB36DXjZO/mmt6l/0vUtwGP9ta+CKxNct54R5UkzWekT4omWQXsA34LeLCqXupbsh44Muv2dO++o33Psx3YDrBp06bTHBkm73rmtB+7WG/fe01nr92Vrvb3StzX0mKM9EvRqvrfqvo9YAOwJcnX+pZkrofN8Ty7qmqqqqYmJub8KgJJ0mla0FkuVfVL4F+AK/s2TQMbZ93eALyzqMkkSQsyylkuE0nW9q5/Efgm8NO+ZbuBm3pnu2wFjlfVUSRJZ8wox9DPA/6udxz9c8A/VNUPktwCUFU7gT3A1cBh4APg5iWaV5I0wNCgV9WrwMVz3L9z1vUCbhvvaJKkhfCTopLUCIMuSY0w6JLUCIMuSY0w6JLUCIMuSY0w6JLUCIMuSY0w6JLUCIMuSY0w6JLUCIMuSY0w6JLUCIMuSY0w6JLUCIMuSY0w6JLUCIMuSY0w6JLUCIMuSY0w6JLUCIMuSY0w6JLUCIMuSY0w6JLUiKFBT7IxyXNJDiU5mOT2OdZckeR4kgO9yz1LM64kaZDVI6w5BdxZVfuTfAXYl+RHVfVG37rnq+ra8Y8oSRrF0HfoVXW0qvb3rr8PHALWL/VgkqSFWdAx9CSTwMXAS3NsvizJK0meTXLhgMdvT7I3yd5jx44teFhJ0mAjBz3JOcCTwB1VdaJv837g/Kq6CHgAeHqu56iqXVU1VVVTExMTpzuzJGkOIwU9yRpmYv5YVT3Vv72qTlTVyd71PcCaJOvGOqkkaV6jnOUS4GHgUFXdN2DNub11JNnSe973xjmoJGl+o5zlcjlwI/BakgO9++4GNgFU1U7gOuDWJKeAD4Hrq6qWYF5J0gBDg15VLwAZsmYHsGNcQ0mSFs5PikpSIwy6JDXCoEtSIwy6JDXCoEtSIwy6JDXCoEtSIwy6JDXCoEtSIwy6JDXCoEtSIwy6JDXCoEtSIwy6JDXCoEtSIwy6JDXCoEtSIwy6JDXCoEtSIwy6JDXCoEtSIwy6JDXCoEtSIwy6JDXCoEtSIwy6JDViaNCTbEzyXJJDSQ4muX2ONUlyf5LDSV5NcsnSjCtJGmT1CGtOAXdW1f4kXwH2JflRVb0xa81VwObe5VLgod6fkqQzZOg79Ko6WlX7e9ffBw4B6/uWbQMerRkvAmuTnDf2aSVJA43yDv1XkkwCFwMv9W1aDxyZdXu6d9/RvsdvB7YDbNq0aWGTniUm73qmk9d9+95rOnldnVn+fGkxRv6laJJzgCeBO6rqRP/mOR5Sn7mjaldVTVXV1MTExMImlSTNa6SgJ1nDTMwfq6qn5lgyDWycdXsD8M7ix5MkjWqUs1wCPAwcqqr7BizbDdzUO9tlK3C8qo4OWCtJWgKjHEO/HLgReC3Jgd59dwObAKpqJ7AHuBo4DHwA3Dz+USVJ8xka9Kp6gbmPkc9eU8Bt4xpKkrRwflJUkhph0CWpEQZdkhph0CWpEQZdkhph0CWpEQZdkhph0CWpEQZdkhph0CWpEQZdkhph0CWpEQZdkhph0CWpEQZdkhph0CWpEQZdkhph0CWpEQZdkhph0CWpEQZdkhph0CWpEQZdkhph0CWpEQZdkhoxNOhJHknybpLXB2y/IsnxJAd6l3vGP6YkaZjVI6z5HrADeHSeNc9X1bVjmUiSdFqGvkOvqh8DvzgDs0iSFmFcx9AvS/JKkmeTXDhoUZLtSfYm2Xvs2LExvbQkCcYT9P3A+VV1EfAA8PSghVW1q6qmqmpqYmJiDC8tSfrYooNeVSeq6mTv+h5gTZJ1i55MkrQgiw56knOTpHd9S+8531vs80qSFmboWS5JHgeuANYlmQa+A6wBqKqdwHXArUlOAR8C11dVLdnEkqQ5DQ16Vd0wZPsOZk5rlCR1yE+KSlIjDLokNcKgS1IjDLokNcKgS1IjDLokNcKgS1IjDLokNcKgS1IjDLokNcKgS1IjDLokNcKgS1IjDLokNcKgS1IjDLokNcKgS1IjDLokNcKgS1IjDLokNcKgS1IjDLokNcKgS1IjDLokNcKgS1IjDLokNWJo0JM8kuTdJK8P2J4k9yc5nOTVJJeMf0xJ0jCjvEP/HnDlPNuvAjb3LtuBhxY/liRpoYYGvap+DPxiniXbgEdrxovA2iTnjWtASdJoVo/hOdYDR2bdnu7dd7R/YZLtzLyLZ9OmTWN46ZVj8q5nuh7hjFuJ/80r0Ur8//z2vdcsyfOO45eimeO+mmthVe2qqqmqmpqYmBjDS0uSPjaOoE8DG2fd3gC8M4bnlSQtwDiCvhu4qXe2y1bgeFV95nCLJGlpDT2GnuRx4ApgXZJp4DvAGoCq2gnsAa4GDgMfADcv1bCSpMGGBr2qbhiyvYDbxjaRJOm0+ElRSWqEQZekRhh0SWqEQZekRhh0SWqEQZekRhh0SWqEQZekRhh0SWqEQZekRhh0SWqEQZekRhh0SWqEQZekRhh0SWqEQZekRhh0SWqEQZekRhh0SWqEQZekRhh0SWqEQZekRhh0SWqEQZekRhh0SWrESEFPcmWSN5McTnLXHNuvSHI8yYHe5Z7xjypJms/qYQuSrAIeBP4QmAZeTrK7qt7oW/p8VV27BDNKkkYwyjv0LcDhqnqrqj4Cvg9sW9qxJEkLNUrQ1wNHZt2e7t3X77IkryR5NsmFcz1Rku1J9ibZe+zYsdMYV5I0yChBzxz3Vd/t/cD5VXUR8ADw9FxPVFW7qmqqqqYmJiYWNqkkaV6jBH0a2Djr9gbgndkLqupEVZ3sXd8DrEmybmxTSpKGGiXoLwObk1yQ5PPA9cDu2QuSnJskvetbes/73riHlSQNNvQsl6o6leTbwA+BVcAjVXUwyS297TuB64Bbk5wCPgSur6r+wzKSpCU0NOjwq8Moe/ru2znr+g5gx3hHkyQthJ8UlaRGGHRJaoRBl6RGGHRJaoRBl6RGGHRJaoRBl6RGGHRJaoRBl6RGGHRJaoRBl6RGGHRJaoRBl6RGGHRJaoRBl6RGGHRJaoRBl6RGGHRJaoRBl6RGGHRJaoRBl6RGGHRJaoRBl6RGGHRJaoRBl6RGGHRJasRIQU9yZZI3kxxOctcc25Pk/t72V5NcMv5RJUnzGRr0JKuAB4GrgK8CNyT5at+yq4DNvct24KExzylJGmKUd+hbgMNV9VZVfQR8H9jWt2Yb8GjNeBFYm+S8Mc8qSZrH6hHWrAeOzLo9DVw6wpr1wNHZi5JsZ+YdPMDJJG8uaNpPrAN+fpqPbZH749PcH58YaV/kL8/AJGeHs+JnY5H7+/xBG0YJeua4r05jDVW1C9g1wmvOP1Cyt6qmFvs8rXB/fJr74xPui09rfX+McshlGtg46/YG4J3TWCNJWkKjBP1lYHOSC5J8Hrge2N23ZjdwU+9sl63A8ao62v9EkqSlM/SQS1WdSvJt4IfAKuCRqjqY5Jbe9p3AHuBq4DDwAXDz0o0MjOGwTWPcH5/m/viE++LTmt4fqfrMoW5J0jLkJ0UlqREGXZIaseyCPuxrCFaSJBuTPJfkUJKDSW7veqauJVmV5CdJftD1LF1LsjbJE0l+2vsZuazrmbqS5E97f0deT/J4ki90PdNSWFZBH/FrCFaSU8CdVfW7wFbgthW+PwBuBw51PcRZ4m+Af6yq3wEuYoXulyTrgT8Bpqrqa8yc3HF9t1MtjWUVdEb7GoIVo6qOVtX+3vX3mfkLu77bqbqTZANwDfDdrmfpWpJfA/4AeBigqj6qql92O1WnVgNfTLIa+BKNfk5muQV90FcMrHhJJoGLgZe6naRTfw38GfB/XQ9yFvhN4Bjwt71DUN9N8uWuh+pCVf0H8FfAz5j5OpLjVfVP3U61NJZb0Ef6ioGVJsk5wJPAHVV1out5upDkWuDdqtrX9SxnidXAJcBDVXUx8F/AivydU5JfZ+Zf8hcAvwF8Ocm3up1qaSy3oPsVA32SrGEm5o9V1VNdz9Ohy4E/SvI2M4fivp7k77sdqVPTwHRVffwvtieYCfxK9E3g36rqWFX9D/AU8Psdz7QkllvQR/kaghUjSZg5Rnqoqu7rep4uVdWfV9WGqppk5ufin6uqyXdho6iq/wSOJPnt3l3fAN7ocKQu/QzYmuRLvb8z36DRXxCP8m2LZ41BX0PQ8Vhduhy4EXgtyYHefXdX1Z4OZ9LZ44+Bx3pvft5i6b+S46xUVS8leQLYz8yZYT+h0a8A8KP/ktSI5XbIRZI0gEGXpEYYdElqhEGXpEYYdElqhEGXpEYYdElqxP8DmTNfCNpWifUAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "import matplotlib.pyplot as plt\n", "plt.hist(matrix_A.flat, bins = np.histogram(matrix_A)[1])\n", "plt.show()\n", "\n", "# NumPy has no plotting capabilities, so we're using matplotlib's help. \n", "# .flat -> The flattened version of the array" ] }, { "cell_type": "code", "execution_count": 76, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[1, 0, 0, 3, 1],\n", " [3, 6, 6, 2, 9],\n", " [4, 5, 3, 8, 0]])" ] }, "execution_count": 76, "metadata": {}, "output_type": "execute_result" } ], "source": [ "matrix_A" ] }, { "cell_type": "code", "execution_count": 77, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(array([[0., 0., 2., 0.],\n", " [1., 0., 0., 1.],\n", " [0., 0., 0., 0.],\n", " [1., 0., 0., 0.]]),\n", " array([0. , 0.75, 1.5 , 2.25, 3. ]),\n", " array([2. , 3.75, 5.5 , 7.25, 9. ]))" ] }, "execution_count": 77, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.histogram2d(matrix_A[0], matrix_A[1], bins = 4)\n", "\n", "# We pass two datasets for the 2-D histogram. " ] }, { "cell_type": "code", "execution_count": 78, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/plain": [ "(array([[[0., 0., 0., 0.],\n", " [0., 0., 0., 0.],\n", " [0., 1., 1., 0.],\n", " [0., 0., 0., 0.]],\n", " \n", " [[0., 0., 1., 0.],\n", " [0., 0., 0., 0.],\n", " [0., 0., 0., 0.],\n", " [1., 0., 0., 0.]],\n", " \n", " [[0., 0., 0., 0.],\n", " [0., 0., 0., 0.],\n", " [0., 0., 0., 0.],\n", " [0., 0., 0., 0.]],\n", " \n", " [[0., 0., 0., 1.],\n", " [0., 0., 0., 0.],\n", " [0., 0., 0., 0.],\n", " [0., 0., 0., 0.]]]),\n", " [array([0. , 0.75, 1.5 , 2.25, 3. ]),\n", " array([2. , 3.75, 5.5 , 7.25, 9. ]),\n", " array([0., 2., 4., 6., 8.])])" ] }, "execution_count": 78, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.histogramdd(matrix_A.transpose(), bins = 4)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### NaN-Equivalents" ] }, { "cell_type": "code", "execution_count": 79, "metadata": {}, "outputs": [], "source": [ "import numpy as np" ] }, { "cell_type": "code", "execution_count": 80, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[1, 0, 0, 3, 1],\n", " [3, 6, 6, 2, 9],\n", " [4, 5, 3, 8, 0]])" ] }, "execution_count": 80, "metadata": {}, "output_type": "execute_result" } ], "source": [ "matrix_A = np.array([[1,0,0,3,1],[3,6,6,2,9],[4,5,3,8,0]])\n", "matrix_A" ] }, { "cell_type": "code", "execution_count": 81, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "3.4" ] }, "execution_count": 81, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.nanmean(matrix_A)" ] }, { "cell_type": "code", "execution_count": 82, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "3.4" ] }, "execution_count": 82, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.mean(matrix_A)\n", "\n", "# NAN functions work the same way as non-NAN functions for non-NAN datasets" ] }, { "cell_type": "code", "execution_count": 83, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[ 1., 0., 0., 3., 1.],\n", " [ 3., 6., nan, 2., 9.],\n", " [ 4., 5., 3., 8., 0.]])" ] }, "execution_count": 83, "metadata": {}, "output_type": "execute_result" } ], "source": [ "matrix_B = np.array([[1,0,0,3,1],[3,6,np.nan,2,9],[4,5,3,8,0]])\n", "matrix_B" ] }, { "cell_type": "code", "execution_count": 84, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "3.2142857142857144" ] }, "execution_count": 84, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.nanmean(matrix_B)\n", "\n", "# NAN functions ignore \"nan\" values and compute the mean. " ] }, { "cell_type": "code", "execution_count": 85, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "nan" ] }, "execution_count": 85, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.mean(matrix_B)\n", "\n", "# Non-NAN functions return \"nan\" when missing values are present." ] }, { "cell_type": "code", "execution_count": 86, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "4.1" ] }, "execution_count": 86, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.nanquantile(matrix_B, 0.7)" ] }, { "cell_type": "code", "execution_count": 88, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "7.882653061224489" ] }, "execution_count": 88, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.nanvar(matrix_B)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.6" } }, "nbformat": 4, "nbformat_minor": 2 }