|
2057 | 2057 | },
|
2058 | 2058 | {
|
2059 | 2059 | "cell_type": "markdown",
|
2060 |
| - "id": "1dd8c102-fe33-4376-98f8-0b8c9c5b8384", |
| 2060 | + "id": "ef8f5c35-57e4-4347-845e-39fa4de28075", |
2061 | 2061 | "metadata": {
|
2062 | 2062 | "slideshow": {
|
2063 | 2063 | "slide_type": "subslide"
|
|
2071 | 2071 | {
|
2072 | 2072 | "cell_type": "code",
|
2073 | 2073 | "execution_count": 25,
|
2074 |
| - "id": "3d666cd2-eca0-4634-86a5-aebbcda99522", |
| 2074 | + "id": "b998653e-02dd-4540-ba66-dbc4b458b558", |
2075 | 2075 | "metadata": {},
|
2076 | 2076 | "outputs": [
|
2077 | 2077 | {
|
2078 | 2078 | "data": {
|
2079 | 2079 | "text/plain": [
|
2080 |
| - "32.6" |
| 2080 | + "13278.078548601512" |
2081 | 2081 | ]
|
2082 | 2082 | },
|
2083 | 2083 | "execution_count": 25,
|
|
2086 | 2086 | }
|
2087 | 2087 | ],
|
2088 | 2088 | "source": [
|
2089 |
| - "meteorites['mass (g)'].median()" |
| 2089 | + "meteorites['mass (g)'].mean()" |
2090 | 2090 | ]
|
2091 | 2091 | },
|
2092 | 2092 | {
|
2093 | 2093 | "cell_type": "markdown",
|
2094 |
| - "id": "f322c0f3-a057-4193-9f7f-78c9828d6197", |
| 2094 | + "id": "a398ecbe-10cc-4498-a7f1-91ea0bc736d2", |
2095 | 2095 | "metadata": {
|
2096 | 2096 | "slideshow": {
|
2097 | 2097 | "slide_type": "fragment"
|
2098 | 2098 | },
|
2099 | 2099 | "tags": []
|
2100 | 2100 | },
|
2101 | 2101 | "source": [
|
2102 |
| - "We can take this a step further and look at quantiles:" |
| 2102 | + "**Important**: The mean isn't always the best measure of central tendency. If there are outliers in the distribution, the mean will be skewed. Here, the mean is being pulled higher by some very heavy meteorites – the distribution is [right-skewed](https://www.analyticsvidhya.com/blog/2020/07/what-is-skewness-statistics/)." |
| 2103 | + ] |
| 2104 | + }, |
| 2105 | + { |
| 2106 | + "cell_type": "markdown", |
| 2107 | + "id": "7b0162c6-f48f-4687-9902-72325ebecc0d", |
| 2108 | + "metadata": { |
| 2109 | + "slideshow": { |
| 2110 | + "slide_type": "subslide" |
| 2111 | + }, |
| 2112 | + "tags": [] |
| 2113 | + }, |
| 2114 | + "source": [ |
| 2115 | + "Taking a look at some quantiles at the extremes of the distribution shows that the mean is between the 95th and 99th percentile of the distribution, so it isn't a good measure of central tendency here:" |
2103 | 2116 | ]
|
2104 | 2117 | },
|
2105 | 2118 | {
|
2106 | 2119 | "cell_type": "code",
|
2107 | 2120 | "execution_count": 26,
|
2108 |
| - "id": "5d97fd11-12eb-4970-b042-6cbbd35a3a23", |
| 2121 | + "id": "b7379492-da17-4358-b357-2ae6e1a26e67", |
2109 | 2122 | "metadata": {},
|
2110 | 2123 | "outputs": [
|
2111 | 2124 | {
|
2112 | 2125 | "data": {
|
2113 | 2126 | "text/plain": [
|
2114 | 2127 | "0.01 0.44\n",
|
2115 | 2128 | "0.05 1.10\n",
|
| 2129 | + "0.50 32.60\n", |
2116 | 2130 | "0.95 4000.00\n",
|
2117 | 2131 | "0.99 50600.00\n",
|
2118 | 2132 | "Name: mass (g), dtype: float64"
|
|
2124 | 2138 | }
|
2125 | 2139 | ],
|
2126 | 2140 | "source": [
|
2127 |
| - "meteorites['mass (g)'].quantile([0.01, 0.05, 0.95, 0.99])" |
| 2141 | + "meteorites['mass (g)'].quantile([0.01, 0.05, 0.5, 0.95, 0.99])" |
| 2142 | + ] |
| 2143 | + }, |
| 2144 | + { |
| 2145 | + "cell_type": "markdown", |
| 2146 | + "id": "2ca1c739-cf2b-4000-bedb-b66a3d11f071", |
| 2147 | + "metadata": { |
| 2148 | + "slideshow": { |
| 2149 | + "slide_type": "fragment" |
| 2150 | + }, |
| 2151 | + "tags": [] |
| 2152 | + }, |
| 2153 | + "source": [ |
| 2154 | + "A better measure in this case is the median (50th percentile), since it is robust to outliers:" |
| 2155 | + ] |
| 2156 | + }, |
| 2157 | + { |
| 2158 | + "cell_type": "code", |
| 2159 | + "execution_count": 27, |
| 2160 | + "id": "bc2e62f3-899d-4a50-a2f4-8b2e73e1bc2f", |
| 2161 | + "metadata": {}, |
| 2162 | + "outputs": [ |
| 2163 | + { |
| 2164 | + "data": { |
| 2165 | + "text/plain": [ |
| 2166 | + "32.6" |
| 2167 | + ] |
| 2168 | + }, |
| 2169 | + "execution_count": 27, |
| 2170 | + "metadata": {}, |
| 2171 | + "output_type": "execute_result" |
| 2172 | + } |
| 2173 | + ], |
| 2174 | + "source": [ |
| 2175 | + "meteorites['mass (g)'].median()" |
2128 | 2176 | ]
|
2129 | 2177 | },
|
2130 | 2178 | {
|
|
2142 | 2190 | },
|
2143 | 2191 | {
|
2144 | 2192 | "cell_type": "code",
|
2145 |
| - "execution_count": 27, |
| 2193 | + "execution_count": 28, |
2146 | 2194 | "id": "585af605-e601-49b6-bd1f-4838ab993302",
|
2147 | 2195 | "metadata": {},
|
2148 | 2196 | "outputs": [
|
|
2152 | 2200 | "60000000.0"
|
2153 | 2201 | ]
|
2154 | 2202 | },
|
2155 |
| - "execution_count": 27, |
| 2203 | + "execution_count": 28, |
2156 | 2204 | "metadata": {},
|
2157 | 2205 | "output_type": "execute_result"
|
2158 | 2206 | }
|
|
2176 | 2224 | },
|
2177 | 2225 | {
|
2178 | 2226 | "cell_type": "code",
|
2179 |
| - "execution_count": 28, |
| 2227 | + "execution_count": 29, |
2180 | 2228 | "id": "29720ccc-3855-42f7-a0d0-e41a83cf1bef",
|
2181 | 2229 | "metadata": {},
|
2182 | 2230 | "outputs": [
|
|
2196 | 2244 | "Name: 16392, dtype: object"
|
2197 | 2245 | ]
|
2198 | 2246 | },
|
2199 |
| - "execution_count": 28, |
| 2247 | + "execution_count": 29, |
2200 | 2248 | "metadata": {},
|
2201 | 2249 | "output_type": "execute_result"
|
2202 | 2250 | }
|
|
2220 | 2268 | },
|
2221 | 2269 | {
|
2222 | 2270 | "cell_type": "code",
|
2223 |
| - "execution_count": 29, |
| 2271 | + "execution_count": 30, |
2224 | 2272 | "id": "79c2a1db-0eeb-4173-964a-a38741c059ba",
|
2225 | 2273 | "metadata": {},
|
2226 | 2274 | "outputs": [
|
|
2230 | 2278 | "466"
|
2231 | 2279 | ]
|
2232 | 2280 | },
|
2233 |
| - "execution_count": 29, |
| 2281 | + "execution_count": 30, |
2234 | 2282 | "metadata": {},
|
2235 | 2283 | "output_type": "execute_result"
|
2236 | 2284 | }
|
|
2254 | 2302 | },
|
2255 | 2303 | {
|
2256 | 2304 | "cell_type": "code",
|
2257 |
| - "execution_count": 30, |
| 2305 | + "execution_count": 31, |
2258 | 2306 | "id": "3ac57de5-7734-478a-9772-feb82890d5ef",
|
2259 | 2307 | "metadata": {},
|
2260 | 2308 | "outputs": [
|
|
2266 | 2314 | " dtype=object)"
|
2267 | 2315 | ]
|
2268 | 2316 | },
|
2269 |
| - "execution_count": 30, |
| 2317 | + "execution_count": 31, |
2270 | 2318 | "metadata": {},
|
2271 | 2319 | "output_type": "execute_result"
|
2272 | 2320 | }
|
|
2299 | 2347 | },
|
2300 | 2348 | {
|
2301 | 2349 | "cell_type": "code",
|
2302 |
| - "execution_count": 31, |
| 2350 | + "execution_count": 32, |
2303 | 2351 | "id": "f0297d45-1d86-411f-ad8e-74cfaa3b2389",
|
2304 | 2352 | "metadata": {},
|
2305 | 2353 | "outputs": [
|
|
2512 | 2560 | "max NaN 81.166670 354.473330 NaN "
|
2513 | 2561 | ]
|
2514 | 2562 | },
|
2515 |
| - "execution_count": 31, |
| 2563 | + "execution_count": 32, |
2516 | 2564 | "metadata": {},
|
2517 | 2565 | "output_type": "execute_result"
|
2518 | 2566 | }
|
|
2557 | 2605 | },
|
2558 | 2606 | {
|
2559 | 2607 | "cell_type": "code",
|
2560 |
| - "execution_count": 32, |
| 2608 | + "execution_count": 33, |
2561 | 2609 | "id": "876cafcb-00ab-4f5a-8b3c-bfead4f0b14c",
|
2562 | 2610 | "metadata": {},
|
2563 | 2611 | "outputs": [],
|
|
2578 | 2626 | },
|
2579 | 2627 | {
|
2580 | 2628 | "cell_type": "code",
|
2581 |
| - "execution_count": 33, |
| 2629 | + "execution_count": 34, |
2582 | 2630 | "id": "6402bb24-3da9-48e5-bde1-0b8a9576f00d",
|
2583 | 2631 | "metadata": {},
|
2584 | 2632 | "outputs": [],
|
|
0 commit comments