Practicing EDA#

import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt

sns.set_theme(style="white", font_scale=1.4)
df = pd.read_csv("https://github.com/firasm/bits/raw/master/bullet_data.csv")
df.head()
x y bullet zone
0 0 0 0.0 OutsidePlane
1 0 1 0.0 OutsidePlane
2 0 2 0.0 OutsidePlane
3 0 3 0.0 OutsidePlane
4 0 4 0.0 OutsidePlane
df.describe().T
count mean std min 25% 50% 75% max
x 87500.0 124.500000 72.168619 0.0 62.0 124.5 187.0 249.0
y 87500.0 174.500000 101.036462 0.0 87.0 174.5 262.0 349.0
bullet 68526.0 0.008741 0.093086 0.0 0.0 0.0 0.0 1.0
print(f"The possible values for zone are:\n {sorted(list(df['zone'].unique()))} ")
The possible values for zone are:
 ['A', 'B', 'C', 'D', 'E', 'OutsidePlane', 'Unknown'] 
print(f"The possible columns are: {list(df.columns)}")
The possible columns are: ['x', 'y', 'bullet', 'zone']
print(f"The possible values for bullets are: \n {df['bullet'].unique()}")
The possible values for bullets are: 
 [ 0. nan  1.]
len(df[df['bullet']==1])
599
df['bullet']==1
0        False
1        False
2        False
3        False
4        False
         ...  
87495    False
87496    False
87497    False
87498    False
87499    False
Name: bullet, Length: 87500, dtype: bool
hits_df = df[df['bullet']==1]
hits_df.head()
x y bullet zone
24303 69 153 1.0 B
24308 69 158 1.0 B
24341 69 191 1.0 B
24629 70 129 1.0 B
24636 70 136 1.0 B
hits_df['zone'].unique()
array(['B', 'C', 'A', 'Unknown', 'D', 'E'], dtype=object)
hits_df.groupby('zone').count()
x y bullet
zone
A 83 83 83
B 259 259 259
C 83 83 83
D 47 47 47
E 111 111 111
Unknown 16 16 16
sns.countplot(data=hits_df, x= "zone")
sns.despine()
#plt.title("Bullet hit count by Airplane Zone")
#plt.xlabel("Bullet hits")
../../../_images/38b1e41a6cc31a597fabc75fc0e0dc4514342bccf0cc37ba1b0c7b1ea18491f5.png
df["outline"] = np.where(df["zone"]== "OutsidePlane",0,1)
df
x y bullet zone outline
0 0 0 0.0 OutsidePlane 0
1 0 1 0.0 OutsidePlane 0
2 0 2 0.0 OutsidePlane 0
3 0 3 0.0 OutsidePlane 0
4 0 4 0.0 OutsidePlane 0
... ... ... ... ... ...
87495 249 345 0.0 OutsidePlane 0
87496 249 346 0.0 OutsidePlane 0
87497 249 347 0.0 OutsidePlane 0
87498 249 348 0.0 OutsidePlane 0
87499 249 349 0.0 OutsidePlane 0

87500 rows × 5 columns

df.pivot("x","y","outline")
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[14], line 1
----> 1 df.pivot("x","y","outline")

TypeError: DataFrame.pivot() takes 1 positional argument but 4 were given
ax = sns.heatmap(data=df.pivot("x","y","outline"))
ax.set(xlabel=None)
ax.set(ylabel=None)
plt.axis('off')
(0.0, 350.0, 250.0, 0.0)
../../../_images/4873dfb611e1fe9088e4cd99600ee12b4f8ae7cd2e1922092e5356466af14d24.png
ax = sns.heatmap(data=df.pivot("x","y","bullet"),
                 cmap="Spectral")
ax.set(xlabel=None)
ax.set(ylabel=None)
plt.axis('off')
(0.0, 350.0, 250.0, 0.0)
../../../_images/22997baf0ef4094aeae6763dada845e8ae4da5cab295f0d459044f190fda73b2.png