My Notes

統計学とかR(R言語)とかPython3の覚え書きとか走り書きとか。 座右の銘にしたい: All work and no play makes Jack a dull boy.

『品質管理のための統計手法』の第1章 p.27~29のヒストグラムの作成をR(R言語)で

Rコード

df <- data.frame(n = c(36, 15, 27, 20, 23,
                        35, 27, 24, 30, 42,
                        38, 34, 15, 24, 38,
                        24, 43, 23, 44, 19,
                        14, 20, 29, 30, 35,
                        35, 25, 24, 34, 31,
                        12, 18, 22, 15, 37,
                        29, 27, 44, 18, 28,
                        25, 21, 37, 19, 33,
                        22, 31, 24, 36, 23))

df

# 測定単位(測定の最小のきざみ) m = 1.0
# 仮の区間数hを決める h = sqrt(n) = sqrt(50) 約7
sqrt(length(df$n))
round(sqrt(length(df$n)))

summary(df$n)

# 最大値
max(df$n)

# 最小値
min(df$n)

# 範囲
max(df$n) - min(df$n)

# 区間の幅 c = R / h = 32 / 7 = 4.57 約5.0
(max(df$n) - min(df$n)) / round(sqrt(length(df$n)))
round((max(df$n) - min(df$n)) / round(sqrt(length(df$n))))

# 最初の区間の下限 Xmin - (m / 2) = 12 - (1.0 / 2) = 11.5
min(df$n) - (1.0 / 2)

# 上限 11.5 + c = 11.5 + 5.0 = 16.5
11.5 + 5.0
(min(df$n) - (1.0 / 2)) + round((max(df$n) - min(df$n)) /
round(sqrt(length(df$n))))

# 最初の区間 (11.5, 16.5)
# (16.5, 21.5), (21.5, 26.5), ... (41.5, 46.5) まで
# ヒストグラムの作成
h <- hist(df$n, breaks = seq(11.5, 46.5, 5.0), right = FALSE)
h

round(mean(df$n), 1)
round(sd(df$n), 2)

R Console

> df <- data.frame(n = c(36, 15, 27, 20, 23,
+                        35, 27, 24, 30, 42,
+                        38, 34, 15, 24, 38,
+                        24, 43, 23, 44, 19,
+                        14, 20, 29, 30, 35,
+                        35, 25, 24, 34, 31,
+                        12, 18, 22, 15, 37,
+                        29, 27, 44, 18, 28,
+                        25, 21, 37, 19, 33,
+                        22, 31, 24, 36, 23))
> 
> df
    n
1  36
2  15
3  27
4  20
5  23
6  35
7  27
8  24
9  30
10 42
11 38
12 34
13 15
14 24
15 38
16 24
17 43
18 23
19 44
20 19
21 14
22 20
23 29
24 30
25 35
26 35
27 25
28 24
29 34
30 31
31 12
32 18
33 22
34 15
35 37
36 29
37 27
38 44
39 18
40 28
41 25
42 21
43 37
44 19
45 33
46 22
47 31
48 24
49 36
50 23
> 
> # 測定単位(測定の最小のきざみ) m = 1.0
> # 仮の区間数hを決める h = sqrt(n) = sqrt(50) 約7
> sqrt(length(df$n))
[1] 7.071068
> round(sqrt(length(df$n)))
[1] 7
> 
> summary(df$n)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  12.00   22.00   27.00   27.58   34.75   44.00 
> 
> # 最大値
> max(df$n)
[1] 44
> 
> # 最小値
> min(df$n)
[1] 12
> 
> # 範囲
> max(df$n) - min(df$n)
[1] 32
> 
> # 区間の幅 c = R / h = 32 / 7 = 4.57 約5.0
> (max(df$n) - min(df$n)) / round(sqrt(length(df$n)))
[1] 4.571429
> round((max(df$n) - min(df$n)) / round(sqrt(length(df$n))))
[1] 5
> 
> # 最初の区間の下限 Xmin - (m / 2) = 12 - (1.0 / 2) = 11.5
> min(df$n) - (1.0 / 2)
[1] 11.5
> 
> # 上限 11.5 + c = 11.5 + 5.0 = 16.5
> 11.5 + 5.0
[1] 16.5
> (min(df$n) - (1.0 / 2)) + round((max(df$n) - min(df$n)) /
+                                     round(sqrt(length(df$n))))
[1] 16.5
> 
> # 最初の区間 (11.5, 16.5)
> # (16.5, 21.5), (21.5, 26.5), ... (41.5, 46.5) まで
> # ヒストグラムの作成
> h <- hist(df$n, breaks = seq(11.5, 46.5, 5.0), right = FALSE)
> h
$breaks
[1] 11.5 16.5 21.5 26.5 31.5 36.5 41.5 46.5

$counts
[1]  5  7 12 10  8  4  4

$density
[1] 0.020 0.028 0.048 0.040 0.032 0.016 0.016

$mids
[1] 14 19 24 29 34 39 44

$xname
[1] "df$n"

$equidist
[1] TRUE

attr(,"class")
[1] "histogram"
> 
> round(mean(df$n), 1)
[1] 27.6
> round(sd(df$n), 2)
[1] 8.37

ヒストグラムスクリーンショット

f:id:my_notes:20170528022640p:plain

参考文献

品質管理のための統計手法 (日経文庫)

品質管理のための統計手法 (日経文庫)