In [1]:
Abalone <- read.csv("abalone.csv")
Abalone<-data.frame(Abalone)
dim(Abalone)
  1. 4177
  2. 9
In [2]:
head(Abalone)
SexLengthDiamHeightWholeShuckedVisceraShellRings
M 0.455 0.365 0.095 0.51400.22450.10100.150 15
M 0.350 0.265 0.090 0.22550.09950.04850.070 7
F 0.530 0.420 0.135 0.67700.25650.14150.210 9
M 0.440 0.365 0.125 0.51600.21550.11400.155 10
I 0.330 0.255 0.080 0.20500.08950.03950.055 7
I 0.425 0.300 0.095 0.35150.14100.07750.120 8
In [3]:
str(Abalone)
'data.frame':	4177 obs. of  9 variables:
 $ Sex    : Factor w/ 3 levels "F","I","M": 3 3 1 3 2 2 1 1 3 1 ...
 $ Length : num  0.455 0.35 0.53 0.44 0.33 0.425 0.53 0.545 0.475 0.55 ...
 $ Diam   : num  0.365 0.265 0.42 0.365 0.255 0.3 0.415 0.425 0.37 0.44 ...
 $ Height : num  0.095 0.09 0.135 0.125 0.08 0.095 0.15 0.125 0.125 0.15 ...
 $ Whole  : num  0.514 0.226 0.677 0.516 0.205 ...
 $ Shucked: num  0.2245 0.0995 0.2565 0.2155 0.0895 ...
 $ Viscera: num  0.101 0.0485 0.1415 0.114 0.0395 ...
 $ Shell  : num  0.15 0.07 0.21 0.155 0.055 0.12 0.33 0.26 0.165 0.32 ...
 $ Rings  : int  15 7 9 10 7 8 20 16 9 19 ...
In [4]:
summary(Abalone)
 Sex          Length           Diam            Height           Whole       
 F:1307   Min.   :0.075   Min.   :0.0550   Min.   :0.0000   Min.   :0.0020  
 I:1342   1st Qu.:0.450   1st Qu.:0.3500   1st Qu.:0.1150   1st Qu.:0.4415  
 M:1528   Median :0.545   Median :0.4250   Median :0.1400   Median :0.7995  
          Mean   :0.524   Mean   :0.4079   Mean   :0.1395   Mean   :0.8287  
          3rd Qu.:0.615   3rd Qu.:0.4800   3rd Qu.:0.1650   3rd Qu.:1.1530  
          Max.   :0.815   Max.   :0.6500   Max.   :1.1300   Max.   :2.8255  
    Shucked          Viscera           Shell            Rings       
 Min.   :0.0010   Min.   :0.0005   Min.   :0.0015   Min.   : 1.000  
 1st Qu.:0.1860   1st Qu.:0.0935   1st Qu.:0.1300   1st Qu.: 8.000  
 Median :0.3360   Median :0.1710   Median :0.2340   Median : 9.000  
 Mean   :0.3594   Mean   :0.1806   Mean   :0.2388   Mean   : 9.934  
 3rd Qu.:0.5020   3rd Qu.:0.2530   3rd Qu.:0.3290   3rd Qu.:11.000  
 Max.   :1.4880   Max.   :0.7600   Max.   :1.0050   Max.   :29.000  
In [5]:
library(ggplot2)
library(gridExtra)
p1 <- qplot(Length, data = Abalone, ylab = "count")
p2 <- qplot(Diam, data = Abalone, ylab = "count")
p3 <- qplot(Height, data = Abalone, ylab = "count")
p4 <- qplot(Whole, data = Abalone, ylab = "count")
p5 <- qplot(Shucked, data = Abalone, ylab = "count")
p6 <- qplot(Viscera, data = Abalone, ylab = "count")
p7 <- qplot(Shell, data = Abalone, ylab = "count")
p8 <- qplot(Rings, data = Abalone, ylab = "count")
gridExtra::grid.arrange(p1, p2, p3, p4, p5, p6, p7, p8,ncol=4, nrow = 3)
Warning message:
"package 'ggplot2' was built under R version 3.4.4"`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
In [6]:
#install.packages("ggsci")
library(ggsci)
t<-data.frame(table(Abalone$Sex))
colnames(t)<-c("Sex","Freq")
t
g <- ggplot(t, aes(x =Sex, y = Freq, fill = Sex))
g <- g + geom_bar(stat = "identity")
g <- g + scale_fill_nejm()
gridExtra::grid.arrange(g,ncol=3, nrow = 3)
Warning message:
"package 'ggsci' was built under R version 3.4.4"
SexFreq
F 1307
I 1342
M 1528
In [7]:
x=Abalone[,-1]
pairs(x,main="アワビデータの散布図",pch = 21)
In [8]:
cor(Abalone[,-1])
LengthDiamHeightWholeShuckedVisceraShellRings
Length1.00000000.98681160.82755360.92526120.89791370.90301770.89770560.5567196
Diam0.98681161.00000000.83368370.92545210.89316250.89972440.90532980.5746599
Height0.82755360.83368371.00000000.81922080.77497230.79831930.81733800.5574673
Whole0.92526120.92545210.81922081.00000000.96940550.96637510.95535540.5403897
Shucked0.89791370.89316250.77497230.96940551.00000000.93196130.88261710.4208837
Viscera0.90301770.89972440.79831930.96637510.93196131.00000000.90765630.5038192
Shell0.89770560.90532980.81733800.95535540.88261710.90765631.00000000.6275740
Rings0.55671960.57465990.55746730.54038970.42088370.50381920.62757401.0000000
In [9]:
#Heightが0.4以上の観測値を除外
Abalone2<-Abalone[Abalone$Height<0.4,]
dim(Abalone2)
head(Abalone2)
  1. 4175
  2. 9
SexLengthDiamHeightWholeShuckedVisceraShellRings
M 0.455 0.365 0.095 0.51400.22450.10100.150 15
M 0.350 0.265 0.090 0.22550.09950.04850.070 7
F 0.530 0.420 0.135 0.67700.25650.14150.210 9
M 0.440 0.365 0.125 0.51600.21550.11400.155 10
I 0.330 0.255 0.080 0.20500.08950.03950.055 7
I 0.425 0.300 0.095 0.35150.14100.07750.120 8
In [10]:
x=Abalone2[,-1]
pairs(x,main="アワビデータの散布図",pch = 21)
In [ ]: