Linearna regresija 7. prosinca 2012. > setwd("/home/marina/statisticki praktikum/vjezbe9") > forbes = read.table("forbes.dat") > hooker = read.table("hooker.dat") > forbes V1 V2 1 194.5 20.79 2 194.3 20.79 3 197.9 22.40 4 198.4 22.67 5 199.4 23.15 6 199.9 23.35 7 200.9 23.89 8 201.1 23.99 9 201.4 24.02 10 201.3 24.01 11 203.6 25.14 12 204.6 26.57 13 209.5 28.49 14 208.6 27.76 15 210.7 29.04 16 211.9 29.88 17 212.2 30.06 > hooker V1 V2 1 180.6 15.376 2 181.0 15.919 3 181.9 15.928 4 181.9 16.106 5 182.4 16.235 6 183.2 16.385 7 184.1 16.817 8 184.6 16.881 9 184.1 16.959 10 185.6 17.062 11 186.0 17.221 12 185.7 17.267 13 188.8 18.356 1
14 188.5 18.507 15 189.5 18.869 16 190.6 19.386 17 191.1 19.490 18 191.4 19.758 19 193.6 20.212 20 193.4 20.480 21 195.6 21.605 22 196.3 21.654 23 197.0 21.892 24 196.4 21.928 25 199.5 23.030 26 200.1 23.369 27 200.6 23.726 28 202.5 24.697 29 208.4 27.972 30 210.2 28.559 31 210.8 29.211 > x = c(forbes[, 2], hooker[, 2]) > y = c(forbes[, 1], hooker[, 1]) > x [1] 20.790 20.790 22.400 22.670 23.150 23.350 23.890 23.990 24.020 24.010 [11] 25.140 26.570 28.490 27.760 29.040 29.880 30.060 15.376 15.919 15.928 [21] 16.106 16.235 16.385 16.817 16.881 16.959 17.062 17.221 17.267 18.356 [31] 18.507 18.869 19.386 19.490 19.758 20.212 20.480 21.605 21.654 21.892 [41] 21.928 23.030 23.369 23.726 24.697 27.972 28.559 29.211 > y [1] 194.5 194.3 197.9 198.4 199.4 199.9 200.9 201.1 201.4 201.3 203.6 204.6 [13] 209.5 208.6 210.7 211.9 212.2 180.6 181.0 181.9 181.9 182.4 183.2 184.1 [25] 184.6 184.1 185.6 186.0 185.7 188.8 188.5 189.5 190.6 191.1 191.4 193.6 [37] 193.4 195.6 196.3 197.0 196.4 199.5 200.1 200.6 202.5 208.4 210.2 210.8 > n = length(x) > n [1] 48 kvadratični model za Forbesove i Hookerove podatke zajedno > X = cbind(rep(1, n), x, x^2) > thetakapa = solve(t(x) %*% X) %*% t(x) %*% y > thetakapa 127.19752846 x 4.11706047-0.04302614 2
> ykapa = function(x) rbind(c(1, x, x^2)) %*% thetakapa > ykapa = function(x) thetakapa[1] + thetakapa[2] * x + thetakapa[3] * + x^2 > ykapa(x) [1] 194.1943 194.1943 197.8309 198.4189 199.4488 199.8721 200.9977 201.2034 [9] 201.2649 201.2444 203.5071 206.2129 209.5691 208.3304 210.4721 211.8009 [17] 212.0778 180.3292 181.8336 181.8583 182.3458 182.6974 183.1044 184.2658 [25] 184.4365 184.6441 184.9174 185.3375 185.4586 188.2729 188.6551 189.5634 [33] 190.8409 191.0951 191.7459 192.8343 193.4685 196.0631 196.1736 196.7075 [41] 196.7879 199.1932 199.9121 200.6585 202.6331 208.6949 209.6838 210.7475 > plot(x, y, xlab = "atmosferski tlak", ylab = "vrelište") > curve(ykapa, add = T) reziduali > e = y - ykapa(x) > e [1] 0.305716985 0.105716985 0.069110667-0.018914922-0.048804339 [6] 0.027926624-0.097706529-0.103403441 0.135055288 0.055560440 [11] 0.092932148-1.612883846-0.069123455 0.269570010 0.227904745 [16] 0.099057857 0.122204551 0.270847881-0.833564878 0.041713858 [21] -0.445785645-0.297381883 0.095585927-0.165848979 0.163452412 [26] -0.544109960 0.682603403 0.662526122 0.241400067 0.527053313 [31] -0.155125882-0.063353343-0.240909802 0.004875049-0.345928302 [36] 0.765694845-0.068457736-0.463051499 0.126414649 0.292474886 [41] -0.387864702 0.306809286 0.187895098-0.058500735-0.133132086 [46] -0.294885105 0.516166634 0.052467339 koeficijent determinacije > SSE = sum(e^2) > SSE [1] 7.345996 > Syy = (n - 1) * var(y) > Syy [1] 4190.937 > R2 = 1 - SSE/Syy > R2 [1] 0.9982472 standardizirani reziduali > k = 2 > sigmakapa = sqrt(sse/(n - k - 1)) > sigmakapa 3
[1] 0.4040351 > H = X %*% solve(t(x) %*% X) %*% t(x) > hii = diag(h) > hii [1] 0.03816211 0.03816211 0.04366972 0.04403765 0.04424915 0.04418415 [7] 0.04365905 0.04352148 0.04347866 0.04349300 0.04211561 0.04704348 [13] 0.09215738 0.06713647 0.11986428 0.18092796 0.19747691 0.13376688 [19] 0.09910513 0.09861201 0.08936258 0.08323740 0.07669464 0.06105079 [25] 0.05910575 0.05685580 0.05408109 0.05021658 0.04918931 0.03469330 [31] 0.03388174 0.03279870 0.03291404 0.03311727 0.03384766 0.03557042 [37] 0.03675029 0.04155088 0.04172242 0.04248436 0.04258875 0.04424698 [43] 0.04417379 0.04386273 0.04249041 0.07319933 0.09517463 0.13031517 > es = e/(sigmakapa * sqrt(1 - hii)) > es [1] 0.77152419 0.26679319 0.17491298-0.04788121-0.12355690 0.07069883 [7] -0.24728520-0.26168468 0.34177885 0.14060557 0.23501217-4.08928584 [13] -0.17955648 0.69078585 0.60125605 0.27089987 0.33762883 0.72025946 [19] -2.17361824 0.10874402-1.15620343-0.76871788 0.24620802-0.42361631 [25] 0.41706317-1.38668733 1.73709037 1.68256415 0.61273301 1.32770871 [31] -0.39061601-0.15943807-0.60632133 0.01227080-0.87105225 1.92975147 [37] -0.17263705-1.17064626 0.31961905 0.73976974-0.98109692 0.77674162 [43] 0.47567125-0.14807513-0.33673797-0.75812460 1.34303850 0.13924801 > par(mfrow = c(1, 2)) > plot(ykapa(x), e, main = "Graf reziduala") > abline(0, 0) > plot(ykapa(x), es, main = "Graf standardiziranih reziduala") > abline(0, 0) provjera pripadnosti standardiziranih reziduala N(0,1) distribuciji > i = 1:n > x_es = qnorm((i - 3/8)/(n + 1/4)) > x_es [1] -2.22760697-1.82928107-1.60357262-1.43861700-1.30553762-1.19225666 [7] -1.09250453-1.00258285-0.92011445-0.84347341-0.77149344-0.70330566 [13] -0.63824215-0.57577564-0.51548004-0.45700369-0.40005070-0.34436747 [19] -0.28973272-0.23594996-0.18284163-0.13024441-0.07800530-0.02597834 [25] 0.02597834 0.07800530 0.13024441 0.18284163 0.23594996 0.28973272 [31] 0.34436747 0.40005070 0.45700369 0.51548004 0.57577564 0.63824215 [37] 0.70330566 0.77149344 0.84347341 0.92011445 1.00258285 1.09250453 [43] 1.19225666 1.30553762 1.43861700 1.60357262 1.82928107 2.22760697 > y_es = sort(es) > y_es 4
[1] -4.08928584-2.17361824-1.38668733-1.17064626-1.15620343-0.98109692 [7] -0.87105225-0.76871788-0.75812460-0.60632133-0.42361631-0.39061601 [13] -0.33673797-0.26168468-0.24728520-0.17955648-0.17263705-0.15943807 [19] -0.14807513-0.12355690-0.04788121 0.01227080 0.07069883 0.10874402 [25] 0.13924801 0.14060557 0.17491298 0.23501217 0.24620802 0.26679319 [31] 0.27089987 0.31961905 0.33762883 0.34177885 0.41706317 0.47567125 [37] 0.60125605 0.61273301 0.69078585 0.72025946 0.73976974 0.77152419 [43] 0.77674162 1.32770871 1.34303850 1.68256415 1.73709037 1.92975147 > par(mfrow = c(1, 1)) > plot(x_es, y_es, main = "Normalni vjerojatnosni graf") > abline(0, 1) > d = max(max(abs((i - 1)/n - pnorm(y_es)), abs(i/n - pnorm(y_es)))) > d [1] 0.1275895 > ks.test(es, pnorm, 0, 1) One-sample Kolmogorov-Smirnov test data: es D = 0.1276, p-value = 0.3829 alternative hypothesis: two-sided test prihvatljivosti linearnog u odnosu na kvadratični model > Xr = X[, 1:2] > Xr x [1,] 1 20.790 [2,] 1 20.790 [3,] 1 22.400 [4,] 1 22.670 [5,] 1 23.150 [6,] 1 23.350 [7,] 1 23.890 [8,] 1 23.990 [9,] 1 24.020 [10,] 1 24.010 [11,] 1 25.140 [12,] 1 26.570 [13,] 1 28.490 [14,] 1 27.760 [15,] 1 29.040 [16,] 1 29.880 [17,] 1 30.060 [18,] 1 15.376 [19,] 1 15.919 [20,] 1 15.928 [21,] 1 16.106 5
[22,] 1 16.235 [23,] 1 16.385 [24,] 1 16.817 [25,] 1 16.881 [26,] 1 16.959 [27,] 1 17.062 [28,] 1 17.221 [29,] 1 17.267 [30,] 1 18.356 [31,] 1 18.507 [32,] 1 18.869 [33,] 1 19.386 [34,] 1 19.490 [35,] 1 19.758 [36,] 1 20.212 [37,] 1 20.480 [38,] 1 21.605 [39,] 1 21.654 [40,] 1 21.892 [41,] 1 21.928 [42,] 1 23.030 [43,] 1 23.369 [44,] 1 23.726 [45,] 1 24.697 [46,] 1 27.972 [47,] 1 28.559 [48,] 1 29.211 > thetakapar = solve(t(xr) %*% Xr) %*% t(xr) %*% y > thetakapar 148.125850 x 2.183258 > ykapar = function(x) thetakapar[1] + thetakapar[2] * x > er = y - ykapar(x) > er [1] 0.98421060 0.78421060 0.86916482 0.77968510 0.73172114 0.79506949 [7] 0.61611003 0.59778421 0.83228646 0.75411904 0.58703722-1.53502207 [13] -0.82687791-0.13309939-0.82766995-1.46160688-1.55459336-1.09562924 [19] -1.88113847-1.00078779-1.38940776-1.17104808-0.69853681-0.74170438 [25] -0.38143290-1.05172705 0.22339735 0.27625929-0.12417059 0.59826118 [31] -0.03141082 0.17824970 0.14950518 0.42244633 0.13733311 1.34613387 [37] 0.56102066 0.30485513 0.89787548 1.07826001 0.39966272 1.09371213 [43] 0.95358758 0.67416438 0.45422063-0.79595014-0.27752273-1.10100711 > SSEr = sum(er^2) > SSEr [1] 35.87494 6
> m = 1 > f = ((SSEr - SSE)/(k - m))/(sse/(n - k - 1)) > f [1] 174.7622 > pv = 1 - pf(f, k - m, n - k - 1) > pv [1] 0 p.i. za parametre kvadratičnog modela > alfa = 0.05 > t = qt(1 - alfa/2, n - k - 1) > t [1] 2.014103 > C = solve(t(x) %*% X) > cjj = diag(c) > cjj x 1.591914e+01 1.322278e-01 6.489019e-05 > cbind(thetakapa - t * sigmakapa * sqrt(cjj), thetakapa + t * + sigmakapa * sqrt(cjj)) [,2] 123.9506902 130.44436676 x 3.8211487 4.41297223-0.0495814-0.03647087 donja i gornja krivulja p.i. za srednju vrijednost od Y uz dano x > f_donji = function(x0) rbind(c(1, x0, x0^2)) %*% thetakapa - + t * sigmakapa * sqrt(rbind(c(1, x0, x0^2)) %*% solve(t(x) %*% + X) %*% t(rbind(c(1, x0, x0^2)))) > f_gornji = function(x0) rbind(c(1, x0, x0^2)) %*% thetakapa + + t * sigmakapa * sqrt(rbind(c(1, x0, x0^2)) %*% solve(t(x) %*% + X) %*% t(rbind(c(1, x0, x0^2)))) > plot(x, y, xlab = "atmosferski tlak", ylab = "vrelište", main = "Donja i gornja krivulja > curve(ykapa, add = T) > x_os = seq(min(x), max(x), length = 50) > pi_donji = c() > for (i in 1:50) pi_donji = c(pi_donji, f_donji(x_os[i])) > pi_donji [1] 180.0315 180.8885 181.7360 182.5739 183.4022 184.2207 185.0293 185.8280 [9] 186.6166 187.3953 188.1640 188.9231 189.6728 190.4133 191.1450 191.8682 [17] 192.5831 193.2901 193.9892 194.6807 195.3648 196.0414 196.7107 197.3726 [25] 198.0273 198.6747 199.3148 199.9476 200.5729 201.1907 201.8009 202.4033 [33] 202.9977 203.5841 204.1620 204.7314 205.2919 205.8434 206.3856 206.9183 [41] 207.4414 207.9548 208.4585 208.9526 209.4370 209.9118 210.3770 210.8328 [49] 211.2792 211.7162 7
> pi_gornji = c() > for (i in 1:50) pi_gornji = c(pi_gornji, f_gornji(x_os[i])) > pi_gornji [1] 180.6268 181.4366 182.2404 183.0384 183.8306 184.6170 185.3980 186.1734 [9] 186.9434 187.7078 188.4668 189.2199 189.9670 190.7079 191.4420 192.1693 [17] 192.8893 193.6019 194.3068 195.0038 195.6930 196.3740 197.0470 197.7118 [25] 198.3684 199.0169 199.6572 200.2894 200.9136 201.5298 202.1382 202.7389 [33] 203.3321 203.9180 204.4968 205.0688 205.6341 206.1930 206.7458 207.2926 [41] 207.8335 208.3686 208.8980 209.4216 209.9394 210.4514 210.9574 211.4575 [49] 211.9515 212.4394 > lines(x_os, pi_donji, col = "red", lty = 3) > lines(x_os, pi_gornji, col = "green", lty = 3) donja i gornja krivulja p.i. za Y uz dano x > f_donji = function(x0) rbind(c(1, x0, x0^2)) %*% thetakapa - + t * sigmakapa * sqrt(1 + rbind(c(1, x0, x0^2)) %*% solve(t(x) %*% + X) %*% t(rbind(c(1, x0, x0^2)))) > f_gornji = function(x0) rbind(c(1, x0, x0^2)) %*% thetakapa + + t * sigmakapa * sqrt(1 + rbind(c(1, x0, x0^2)) %*% solve(t(x) %*% + X) %*% t(rbind(c(1, x0, x0^2)))) > pi_donji = c() > for (i in 1:50) pi_donji = c(pi_donji, f_donji(x_os[i])) > pi_donji [1] 179.4627 180.3039 181.1363 181.9599 182.7749 183.5813 184.3793 185.1688 [9] 185.9500 186.7229 187.4877 188.2443 188.9930 189.7336 190.4663 191.1911 [17] 191.9082 192.6174 193.3189 194.0126 194.6987 195.3771 196.0479 196.7110 [25] 197.3664 198.0142 198.6544 199.2870 199.9118 200.5290 201.1385 201.7402 [33] 202.3342 202.9203 203.4986 204.0690 204.6314 205.1859 205.7322 206.2704 [41] 206.8004 207.3221 207.8354 208.3402 208.8365 209.3243 209.8033 210.2735 [49] 210.7349 211.1873 > pi_gornji = c() > for (i in 1:50) pi_gornji = c(pi_gornji, f_gornji(x_os[i])) > pi_gornji [1] 181.1956 182.0212 182.8402 183.6524 184.4579 185.2564 186.0480 186.8326 [9] 187.6100 188.3802 189.1431 189.8987 190.6469 191.3876 192.1207 192.8463 [17] 193.5643 194.2745 194.9771 195.6719 196.3590 197.0383 197.7098 198.3735 [25] 199.0293 199.6774 200.3176 200.9500 201.5746 202.1915 202.8006 203.4020 [33] 203.9957 204.5818 205.1602 205.7311 206.2946 206.8505 207.3991 207.9404 [41] 208.4745 209.0014 209.5212 210.0340 210.5399 211.0389 211.5312 212.0168 [49] 212.4958 212.9683 > lines(x_os, pi_donji, col = "red", lty = 2) > lines(x_os, pi_gornji, col = "green", lty = 2) 2. nacin korištenjem naredbe lm 8
> x_sort = x[order(x)] > y_sort = y[order(x)] > x_sortkv = x_sort^2 > regr = lm(y_sort ~ x_sort + x_sortkv) > regr Call: lm(formula = y_sort ~ x_sort + x_sortkv) Coefficients: (Intercept) x_sort x_sortkv 127.19753 4.11706-0.04303 > coefficients(regr) (Intercept) x_sort x_sortkv 127.19752846 4.11706047-0.04302614 > fitted(regr) 1 2 3 4 5 6 7 8 180.3292 181.8336 181.8583 182.3458 182.6974 183.1044 184.2658 184.4365 9 10 11 12 13 14 15 16 184.6441 184.9174 185.3375 185.4586 188.2729 188.6551 189.5634 190.8409 17 18 19 20 21 22 23 24 191.0951 191.7459 192.8343 193.4685 194.1943 194.1943 196.0631 196.1736 25 26 27 28 29 30 31 32 196.7075 196.7879 197.8309 198.4189 199.1932 199.4488 199.8721 199.9121 33 34 35 36 37 38 39 40 200.6585 200.9977 201.2034 201.2444 201.2649 202.6331 203.5071 206.2129 41 42 43 44 45 46 47 48 208.3304 208.6949 209.5691 209.6838 210.4721 210.7475 211.8009 212.0778 > residuals(regr) 1 2 3 4 5 6 0.270847881-0.833564878 0.041713858-0.445785645-0.297381883 0.095585927 7 8 9 10 11 12-0.165848979 0.163452412-0.544109960 0.682603403 0.662526122 0.241400067 13 14 15 16 17 18 0.527053313-0.155125882-0.063353343-0.240909802 0.004875049-0.345928302 19 20 21 22 23 24 0.765694845-0.068457736 0.305716985 0.105716985-0.463051499 0.126414649 25 26 27 28 29 30 0.292474886-0.387864702 0.069110667-0.018914922 0.306809286-0.048804339 31 32 33 34 35 36 0.027926624 0.187895098-0.058500735-0.097706529-0.103403441 0.055560440 37 38 39 40 41 42 0.135055288-0.133132086 0.092932148-1.612883845 0.269570010-0.294885105 43 44 45 46 47 48-0.069123455 0.516166634 0.227904745 0.052467339 0.099057857 0.122204551 9
> rstandard(regr) 1 2 3 4 5 6 0.72025946-2.17361824 0.10874402-1.15620343-0.76871788 0.24620802 7 8 9 10 11 12-0.42361631 0.41706318-1.38668733 1.73709037 1.68256415 0.61273301 13 14 15 16 17 18 1.32770871-0.39061601-0.15943807-0.60632133 0.01227080-0.87105225 19 20 21 22 23 24 1.92975147-0.17263705 0.77152419 0.26679319-1.17064626 0.31961905 25 26 27 28 29 30 0.73976974-0.98109692 0.17491298-0.04788121 0.77674162-0.12355690 31 32 33 34 35 36 0.07069883 0.47567125-0.14807513-0.24728520-0.26168468 0.14060557 37 38 39 40 41 42 0.34177885-0.33673797 0.23501217-4.08928584 0.69078585-0.75812459 43 44 45 46 47 48-0.17955648 1.34303850 0.60125605 0.13924801 0.27089987 0.33762883 > summary(regr) Call: lm(formula = y_sort ~ x_sort + x_sortkv) Residuals: Min 1Q Median 3Q Max -1.61288-0.13863 0.04709 0.19790 0.76569 Coefficients: Estimate Std. Error t value Pr(> t ) (Intercept) 127.197528 1.612051 78.90 <2e-16 *** x_sort 4.117060 0.146920 28.02 <2e-16 *** x_sortkv -0.043026 0.003255-13.22 <2e-16 *** --- Signif. codes: 0 *** 0.001 ** 0.01 * 0.05. 0.1 1 Residual standard error: 0.404 on 45 degrees of freedom Multiple R-squared: 0.9982, Adjusted R-squared: 0.9982 F-statistic: 1.281e+04 on 2 and 45 DF, p-value: < 2.2e-16 > pc = predict(regr, int = "c") > pc fit lwr upr 1 180.3292 180.0315 180.6268 2 181.8336 181.5774 182.0897 3 181.8583 181.6027 182.1138 4 182.3458 182.1025 182.5891 5 182.6974 182.4626 182.9322 6 183.1044 182.8791 183.3298 7 184.2658 184.0648 184.4669 10
8 184.4365 184.2387 184.6344 9 184.6441 184.4501 184.8381 10 184.9174 184.7282 185.1066 11 185.3375 185.1551 185.5198 12 185.4586 185.2781 185.6391 13 188.2729 188.1214 188.4245 14 188.6551 188.5053 188.8049 15 189.5634 189.4160 189.7107 16 190.8409 190.6933 190.9885 17 191.0951 190.9470 191.2432 18 191.7459 191.5962 191.8956 19 192.8343 192.6808 192.9878 20 193.4685 193.3125 193.6245 21 194.1943 194.0353 194.3533 22 194.1943 194.0353 194.3533 23 196.0631 195.8972 196.2289 24 196.1736 196.0074 196.3398 25 196.7075 196.5398 196.8753 26 196.7879 196.6199 196.9558 27 197.8309 197.6608 198.0009 28 198.4189 198.2481 198.5897 29 199.1932 199.0220 199.3644 30 199.4488 199.2776 199.6200 31 199.8721 199.7010 200.0431 32 199.9121 199.7411 200.0831 33 200.6585 200.4881 200.8289 34 200.9977 200.8277 201.1677 35 201.2034 201.0336 201.3732 36 201.2444 201.0747 201.4142 37 201.2649 201.0953 201.4346 38 202.6331 202.4654 202.8009 39 203.5071 203.3401 203.6741 40 206.2129 206.0364 206.3894 41 208.3304 208.1196 208.5413 42 208.6949 208.4747 208.9151 43 209.5691 209.3221 209.8162 44 209.6838 209.4328 209.9349 45 210.4721 210.1904 210.7538 46 210.7475 210.4538 211.0413 47 211.8009 211.4548 212.1471 48 212.0778 211.7162 212.4394 > pp = predict(regr, int = "p") > pp fit lwr upr 1 180.3292 179.4627 181.1956 2 181.8336 180.9804 182.6867 3 181.8583 181.0053 182.7112 4 182.3458 181.4964 183.1951 11
5 182.6974 181.8504 183.5443 6 183.1044 182.2600 183.9488 7 184.2658 183.4276 185.1041 8 184.4365 183.5991 185.2740 9 184.6441 183.8075 185.4807 10 184.9174 184.0819 185.7529 11 185.3375 184.5035 186.1714 12 185.4586 184.6251 186.2921 13 188.2729 187.4452 189.1007 14 188.6551 187.8277 189.4826 15 189.5634 188.7363 190.3904 16 190.8409 190.0139 191.6680 17 191.0951 190.2680 191.9223 18 191.7459 190.9185 192.5734 19 192.8343 192.0062 193.6624 20 193.4685 192.6399 194.2970 21 194.1943 193.3651 195.0234 22 194.1943 193.3651 195.0234 23 196.0631 195.2325 196.8936 24 196.1736 195.3430 197.0042 25 196.7075 195.8767 197.5384 26 196.7879 195.9569 197.6188 27 197.8309 196.9995 198.6622 28 198.4189 197.5874 199.2504 29 199.1932 198.3616 200.0248 30 199.4488 198.6172 200.2804 31 199.8721 199.0405 200.7036 32 199.9121 199.0806 200.7437 33 200.6585 199.8271 201.4899 34 200.9977 200.1664 201.8290 35 201.2034 200.3721 202.0347 36 201.2444 200.4132 202.0757 37 201.2649 200.4337 202.0962 38 202.6331 201.8023 203.4640 39 203.5071 202.6763 204.3378 40 206.2129 205.3802 207.0456 41 208.3304 207.4898 209.1711 42 208.6949 207.8519 209.5379 43 209.5691 208.7187 210.4196 44 209.6838 208.8322 210.5354 45 210.4721 209.6109 211.3333 46 210.7475 209.8824 211.6127 47 211.8009 210.9166 212.6853 48 212.0778 211.1873 212.9683 > par(mfrow = c(1, 1)) > plot(x_sort, y_sort) > matlines(x_sort, pc, lty = c(1, 3, 3)) > matlines(x_sort, pp, lty = c(1, 2, 2)) usporedba triju modela za Forbesove podatke 12
> xf = forbes[, 2] > yf = forbes[, 1] > zf = log10(xf) > nf = length(xf) > nf [1] 17 > X1 = cbind(rep(1, nf), xf) > X2 = cbind(rep(1, nf), xf, xf^2) > X3 = cbind(rep(1, nf), zf) > thetakapa1 = solve(t(x1) %*% X1) %*% t(x1) %*% yf > thetakapa2 = solve(t(x2) %*% X2) %*% t(x2) %*% yf > thetakapa3 = solve(t(x3) %*% X3) %*% t(x3) %*% yf > thetakapa1 155.296483 xf 1.901784 > thetakapa2 140.82726246 xf 3.04759260-0.02237667 > thetakapa3 47.86384 zf 111.09210 > ykapa1 = function(x) thetakapa1[1] + thetakapa1[2] * x > ykapa2 = function(x) thetakapa2[1] + thetakapa2[2] * x + thetakapa2[3] * + x^2 > ykapa3 = function(x) thetakapa3[1] + thetakapa3[2] * x krivulje regresije > par(mfrow = c(1, 3)) > plot(xf, yf, xlab = "atmosferski tlak", ylab = "vrelište", main = "Linearni model") > curve(ykapa1, add = T) > plot(xf, yf, xlab = "atmosferski tlak", ylab = "vrelište", main = "Kvadratični model") > curve(ykapa2, add = T) > plot(zf, yf, xlab = "atmosferski tlak", ylab = "vrelište", main = "Linearni model na tran > curve(ykapa3, add = T) koeficijenti determinacije > e1 = yf - ykapa1(xf) > e2 = yf - ykapa2(xf) > e3 = yf - ykapa3(zf) 13
> SSE1 = sum(e1^2) > SSE2 = sum(e2^2) > SSE3 = sum(e3^2) > Syy = (nf - 1) * var(yf) > Rkv1 = 1 - SSE1/Syy > Rkv2 = 1 - SSE2/Syy > Rkv3 = 1 - SSE3/Syy > Rkv1 [1] 0.9944282 > Rkv2 [1] 0.9953363 > Rkv3 [1] 0.9949606 grafovi reziduala > par(mfrow = c(1, 3)) > plot(ykapa1(xf), e1, main = "Linearni model\n graf reziduala") > abline(0, 0) > plot(ykapa2(xf), e2, main = "Kvadratični model\n graf reziduala") > abline(0, 0) > plot(ykapa3(zf), e3, main = "Linearni model na transformiranim podacima\n graf reziduala" > abline(0, 0) Durbin-Watsonov test o nezavisnosti sl. gresaka za linerni model na transformiranim podacima > i = 2:nf > d = sum((e3[i] - e3[i - 1])^2)/sum(e3^2) > d [1] 2.031462 > nf [1] 17 > r = 1 > alfa = 0.05 > c1 = 1.13 > d1 = 1.38 > c2 = 4 - c1 > c2 [1] 2.87 > d2 = 4 - d1 > d2 14
[1] 2.62 > sort(c(c1 = c1, d1 = d1, c2 = c2, d2 = d2, d = d)) c1 d1 d d2 c2 1.130000 1.380000 2.031462 2.620000 2.870000 pouzdano područje za parametar linearnog modela na transformiranim podacima > q = qf(1-0.05, 2, nf - 2) > q [1] 3.68232 > B = t(x3) %*% X3 > B zf 17.00000 23.73269 zf 23.73269 33.17460 > sigmakapa3 = sqrt(sse3/(nf - 2)) > sigmakapa3 [1] 0.4222813 > A = B/(2 * sigmakapa3^2) > A zf 47.66672 66.54468 zf 66.54468 93.01907 > D = diag(eigen(a)$values) > D [,2] [1,] 140.6451 0.00000000 [2,] 0.0000 0.04066426 > V = eigen(a)$vectors > V [,2] [1,] 0.5819997-0.8131890 [2,] 0.8131890 0.5819997 > V %*% D %*% t(v) [,2] [1,] 47.66672 66.54468 [2,] 66.54468 93.01907 15
> b = -B %*% thetakapa3/(sigmakapa3^2) > b -19348.20 zf -27037.54 > bc = t(v) %*% b > bc [1,] -33247.273055 [2,] -2.092845 > c = t(thetakapa3) %*% B %*% thetakapa3/(2 * sigmakapa3^2) - q > c [1,] 1964864 > sx = -bc[1]/(2 * D[1, 1]) > sx [1] 118.1956 > sy = -bc[2]/(2 * D[2, 2]) > sy [1] 25.73322 > cnovi = c - D[1, 1] * sx^2 - D[2, 2] * sy^2 > cnovi [1,] -3.68232 > rx = sqrt(-cnovi/d[1, 1]) > rx [1,] 0.1618074 > ry = sqrt(-cnovi/d[2, 2]) > ry [1,] 9.515998 > phi = seq(0, 2 * pi, length = 50) > xe = sx + rx * cos(phi) > ye = sy + ry * sin(phi) > par(mfrow = c(1, 1)) > plot(xe, ye, type = "l") > xp = V[1, 1] * xe + V[1, 2] * ye > yp = V[2, 1] * xe + V[2, 2] * ye > plot(xp, yp, type = "l", main = "Pouzdano podrucje") 16