In [1]:
import pandas as pd
abalone_df = pd.read_csv("abalone.csv")
abalone_df.head()
Out[1]:
Sex Length Diam Height Whole Shucked Viscera Shell Rings
0 M 0.455 0.365 0.095 0.5140 0.2245 0.1010 0.150 15
1 M 0.350 0.265 0.090 0.2255 0.0995 0.0485 0.070 7
2 F 0.530 0.420 0.135 0.6770 0.2565 0.1415 0.210 9
3 M 0.440 0.365 0.125 0.5160 0.2155 0.1140 0.155 10
4 I 0.330 0.255 0.080 0.2050 0.0895 0.0395 0.055 7
In [2]:
#カウント、平均値、標準偏差、最小値、四分位点、最大値
abalone_df.describe()
Out[2]:
Length Diam Height Whole Shucked Viscera Shell Rings
count 4177.000000 4177.000000 4177.000000 4177.000000 4177.000000 4177.000000 4177.000000 4177.000000
mean 0.523992 0.407881 0.139516 0.828742 0.359367 0.180594 0.238831 9.933684
std 0.120093 0.099240 0.041827 0.490389 0.221963 0.109614 0.139203 3.224169
min 0.075000 0.055000 0.000000 0.002000 0.001000 0.000500 0.001500 1.000000
25% 0.450000 0.350000 0.115000 0.441500 0.186000 0.093500 0.130000 8.000000
50% 0.545000 0.425000 0.140000 0.799500 0.336000 0.171000 0.234000 9.000000
75% 0.615000 0.480000 0.165000 1.153000 0.502000 0.253000 0.329000 11.000000
max 0.815000 0.650000 1.130000 2.825500 1.488000 0.760000 1.005000 29.000000
In [3]:
#性別ごとのカウント
count=abalone_df['Sex'].value_counts()
count_df=pd.DataFrame(count)
print(count_df)
    Sex
M  1528
I  1342
F  1307
In [5]:
import matplotlib.pyplot as plt
%matplotlib inline
count_df.plot.bar(y=['Sex'])
Out[5]:
<matplotlib.axes._subplots.AxesSubplot at 0x22c3b899ac8>
In [6]:
#散布図行列
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
sns.set()
sns.pairplot(abalone_df)
Out[6]:
<seaborn.axisgrid.PairGrid at 0x22c3b336288>
In [7]:
#相関係数
corr = abalone_df.corr()
corr
Out[7]:
Length Diam Height Whole Shucked Viscera Shell Rings
Length 1.000000 0.986812 0.827554 0.925261 0.897914 0.903018 0.897706 0.556720
Diam 0.986812 1.000000 0.833684 0.925452 0.893162 0.899724 0.905330 0.574660
Height 0.827554 0.833684 1.000000 0.819221 0.774972 0.798319 0.817338 0.557467
Whole 0.925261 0.925452 0.819221 1.000000 0.969405 0.966375 0.955355 0.540390
Shucked 0.897914 0.893162 0.774972 0.969405 1.000000 0.931961 0.882617 0.420884
Viscera 0.903018 0.899724 0.798319 0.966375 0.931961 1.000000 0.907656 0.503819
Shell 0.897706 0.905330 0.817338 0.955355 0.882617 0.907656 1.000000 0.627574
Rings 0.556720 0.574660 0.557467 0.540390 0.420884 0.503819 0.627574 1.000000
In [8]:
#Heightが0.4以上の観測値を除外
abalone2_df = abalone_df[abalone_df.Height<0.4] 
print(len(abalone2_df))
abalone2_df.head()
4175
Out[8]:
Sex Length Diam Height Whole Shucked Viscera Shell Rings
0 M 0.455 0.365 0.095 0.5140 0.2245 0.1010 0.150 15
1 M 0.350 0.265 0.090 0.2255 0.0995 0.0485 0.070 7
2 F 0.530 0.420 0.135 0.6770 0.2565 0.1415 0.210 9
3 M 0.440 0.365 0.125 0.5160 0.2155 0.1140 0.155 10
4 I 0.330 0.255 0.080 0.2050 0.0895 0.0395 0.055 7
In [9]:
#散布図行列
sns.pairplot(abalone2_df)
Out[9]:
<seaborn.axisgrid.PairGrid at 0x22c4133e248>
In [ ]: