wine_quality_red <- read.csv("c:/rwork/winequality-red2.csv", header=T)
dim(wine_quality_red)
wine_quality_red[1:5,1:6]
wine_quality_red[1:5,7:12]
#品質と他の変数との散布図
library(ggplot2)
#install.packages("gridExtra")
library(gridExtra)
wine_quality_red$qualityf <- factor(wine_quality_red$quality)
p1 <- qplot(volatile_acidity , quality, data = wine_quality_red, colour = qualityf)
p2 <- qplot(chlorides, quality, data = wine_quality_red, colour = qualityf)
p3 <- qplot(total_sulfur_dioxide , quality, data = wine_quality_red, colour = qualityf)
p4 <- qplot(ph , quality, data = wine_quality_red, colour = qualityf)
p5 <- qplot(sulphates , quality, data = wine_quality_red, colour = qualityf)
p6 <- qplot(alcohol, quality, data = wine_quality_red, colour = qualityf)
gridExtra::grid.arrange(p1,p2,p3,p4,p5,p6,nrow=3,ncol=2)
p7 <- qplot(fixed_acidity, quality, data = wine_quality_red, colour = qualityf)
p8 <- qplot(citric_acid, quality, data = wine_quality_red, colour = qualityf)
p9 <- qplot(residual_sugar, quality, data = wine_quality_red, colour = qualityf)
p10 <- qplot(free_sulfur_dioxide, quality, data = wine_quality_red, colour = qualityf)
p11 <- qplot(density, quality, data = wine_quality_red, colour = qualityf)
gridExtra::grid.arrange(p7,p8,p9,p10,p11,nrow=3,ncol=2)
#外れ値の観測値をデータから除く
wine_quality_red2<-wine_quality_red[wine_quality_red$volatile_acidity<1.2
& wine_quality_red$chlorides <0.5
& wine_quality_red$total_sulfur_dioxide<200
& wine_quality_red$ph<3.8
& wine_quality_red$sulphates<1.5
& wine_quality_red$alcohol<14
& wine_quality_red$fixed_acidity<15
& wine_quality_red$citric_acid<0.9
& wine_quality_red$residual_sugar<9
& wine_quality_red$free_sulfur_dioxide<55
& wine_quality_red$density<1.002,]
dim(wine_quality_red2)