In [2]:
wine_quality_red <- read.csv("c:/rwork/winequality-red2.csv", header=T)
dim(wine_quality_red)
wine_quality_red[1:5,1:6]
wine_quality_red[1:5,7:12]
  1. 1599
  2. 12
fixed_acidityvolatile_aciditycitric_acidresidual_sugarchloridesfree_sulfur_dioxide
7.4 0.70 0.00 1.9 0.07611
7.8 0.88 0.00 2.6 0.09825
7.8 0.76 0.04 2.3 0.09215
11.2 0.28 0.56 1.9 0.07517
7.4 0.70 0.00 1.9 0.07611
total_sulfur_dioxidedensityphsulphatesalcoholquality
34 0.99783.51 0.56 9.4 5
67 0.99683.20 0.68 9.8 5
54 0.99703.26 0.65 9.8 5
60 0.99803.16 0.58 9.8 6
34 0.99783.51 0.56 9.4 5
In [2]:
#品質と他の変数との散布図
library(ggplot2)
#install.packages("gridExtra")
library(gridExtra)
wine_quality_red$qualityf <- factor(wine_quality_red$quality)
p1 <- qplot(volatile_acidity , quality, data = wine_quality_red, colour = qualityf)
p2 <- qplot(chlorides, quality, data = wine_quality_red, colour = qualityf)
p3 <- qplot(total_sulfur_dioxide , quality, data = wine_quality_red, colour = qualityf)
p4 <- qplot(ph , quality, data = wine_quality_red, colour = qualityf)
p5 <- qplot(sulphates  , quality, data = wine_quality_red, colour = qualityf)
p6 <- qplot(alcohol, quality, data = wine_quality_red, colour = qualityf)
gridExtra::grid.arrange(p1,p2,p3,p4,p5,p6,nrow=3,ncol=2)
In [3]:
p7 <- qplot(fixed_acidity, quality, data = wine_quality_red, colour = qualityf)
p8 <- qplot(citric_acid, quality, data = wine_quality_red, colour = qualityf)
p9 <- qplot(residual_sugar, quality, data = wine_quality_red, colour = qualityf)
p10 <- qplot(free_sulfur_dioxide, quality, data = wine_quality_red, colour = qualityf)
p11 <- qplot(density, quality, data = wine_quality_red, colour = qualityf)
gridExtra::grid.arrange(p7,p8,p9,p10,p11,nrow=3,ncol=2)
In [4]:
#外れ値の観測値をデータから除く
wine_quality_red2<-wine_quality_red[wine_quality_red$volatile_acidity<1.2
                                    & wine_quality_red$chlorides <0.5
                                    & wine_quality_red$total_sulfur_dioxide<200
                                    & wine_quality_red$ph<3.8
                                    & wine_quality_red$sulphates<1.5 
                                    & wine_quality_red$alcohol<14
                                    & wine_quality_red$fixed_acidity<15
                                    & wine_quality_red$citric_acid<0.9
                                    & wine_quality_red$residual_sugar<9
                                    & wine_quality_red$free_sulfur_dioxide<55
                                    & wine_quality_red$density<1.002,]
dim(wine_quality_red2)
  1. 1543
  2. 13