Code aus der Vorlesung

Vorlesung vom 21.11.2025

1+1
[1] 2
# Das hier ist ein Logarthmus
log(8, base = 2)
[1] 3
# Zugverspätungen anlegen
# Erstellen eines Vektors mit den Verspätungen
verspaetungen <- c(10, 20, 5, 10, 30, 
                   25, 5, 5, 10, 20, 
                   15, 10, 5, 20, 15, 
                   10, 5, 20, 25, 10)  

#Tabelle mit absoluten Häufigkeiten
table(verspaetungen)
verspaetungen
 5 10 15 20 25 30 
 5  6  2  4  2  1 
#Tabelle mit relativen Häufigkeiten
prop.table(table(verspaetungen))
verspaetungen
   5   10   15   20   25   30 
0.25 0.30 0.10 0.20 0.10 0.05 
## Analyse von mtcars

# Häufigkeiten der Anzahl Zylinder
table(mtcars$cyl)

 4  6  8 
11  7 14 
prop.table(table(mtcars$cyl))

      4       6       8 
0.34375 0.21875 0.43750 
plot(table(mtcars$cyl), type = "h",
     xlab = "Anzahl Zylinder", 
     ylab = "absolute Häufigkeit")

# Häufigkeiten der Beschleunigung (Zeit für Viertelmeile)
table(mtcars$qsec)

 14.5  14.6 15.41  15.5 15.84 16.46  16.7 16.87  16.9 17.02 17.05  17.3  17.4 
    1     1     1     1     1     1     1     1     1     2     1     1     1 
17.42  17.6 17.82 17.98    18  18.3 18.52  18.6 18.61  18.9 19.44 19.47  19.9 
    1     1     1     1     1     1     1     1     1     2     1     1     1 
   20 20.01 20.22  22.9 
    1     1     1     1 
plot(table(mtcars$qsec), type = "h",
     xlab = "Anzahl Zylinder", 
     ylab = "absolute Häufigkeit")

# Stabiagramm auf Basis absoluter Häufigkeiten
plot(table(verspaetungen), type = "h",
     xlab = "Verspätungen",
     ylab = "absolute Häufigkeiten")

## Histogramm

# Histogramm von qsec

hist(mtcars$qsec, freq = FALSE,
     breaks = c(14,16,17,18,19,20,23),
     right = FALSE)

## Empirische Verteilungsfunktion

plot(ecdf(mtcars$qsec))

## Diamantendatensatz

install.packages("ggplot2")
# Downloading packages -------------------------------------------------------
- Downloading ggplot2 from CRAN ...             OK [8.1 Mb in 5.1s]
- Downloading gtable from CRAN ...              OK [219.8 Kb in 0.38s]
- Downloading isoband from CRAN ...             OK [1.8 Mb in 1.7s]
- Downloading S7 from CRAN ...                  OK [335.2 Kb in 0.44s]
Successfully downloaded 4 packages in 13 seconds.

The following package(s) will be installed:
- ggplot2 [4.0.1]
- gtable  [0.3.6]
- isoband [0.2.7]
- S7      [0.2.1]
These packages will be installed into "~/RProjects/fhweb/fhweb/renv/library/macos/R-4.5/aarch64-apple-darwin20".

# Installing packages --------------------------------------------------------
- Installing gtable ...                         OK [installed binary and cached in 0.37s]
- Installing isoband ...                        OK [installed binary and cached in 0.14s]
- Installing S7 ...                             OK [installed binary and cached in 0.14s]
- Installing ggplot2 ...                        OK [installed binary and cached in 0.27s]
Successfully installed 4 packages in 1 second.
library("ggplot2")


# Häufigkeiten des Gewichts
table(diamonds$carat) # -> nicht besonders hilfreich

 0.2 0.21 0.22 0.23 0.24 0.25 0.26 0.27 0.28 0.29  0.3 0.31 0.32 0.33 0.34 0.35 
  12    9    5  293  254  212  253  233  198  130 2604 2249 1840 1189  910  667 
0.36 0.37 0.38 0.39  0.4 0.41 0.42 0.43 0.44 0.45 0.46 0.47 0.48 0.49  0.5 0.51 
 572  394  670  398 1299 1382  706  488  212  110  178   99   63   45 1258 1127 
0.52 0.53 0.54 0.55 0.56 0.57 0.58 0.59  0.6 0.61 0.62 0.63 0.64 0.65 0.66 0.67 
 817  709  625  496  492  430  310  282  228  204  135  102   80   65   48   48 
0.68 0.69  0.7 0.71 0.72 0.73 0.74 0.75 0.76 0.77 0.78 0.79  0.8 0.81 0.82 0.83 
  25   26 1981 1294  764  492  322  249  251  251  187  155  284  200  140  131 
0.84 0.85 0.86 0.87 0.88 0.89  0.9 0.91 0.92 0.93 0.94 0.95 0.96 0.97 0.98 0.99 
  64   62   34   31   23   21 1485  570  226  142   59   65  103   59   31   23 
   1 1.01 1.02 1.03 1.04 1.05 1.06 1.07 1.08 1.09  1.1 1.11 1.12 1.13 1.14 1.15 
1558 2242  883  523  475  361  373  342  246  287  278  308  251  246  207  149 
1.16 1.17 1.18 1.19  1.2 1.21 1.22 1.23 1.24 1.25 1.26 1.27 1.28 1.29  1.3 1.31 
 172  110  123  126  645  473  300  279  236  187  146  134  106  101  122  133 
1.32 1.33 1.34 1.35 1.36 1.37 1.38 1.39  1.4 1.41 1.42 1.43 1.44 1.45 1.46 1.47 
  89   87   68   77   50   46   26   36   50   40   25   19   18   15   18   21 
1.48 1.49  1.5 1.51 1.52 1.53 1.54 1.55 1.56 1.57 1.58 1.59  1.6 1.61 1.62 1.63 
   7   11  793  807  381  220  174  124  109  106   89   89   95   64   61   50 
1.64 1.65 1.66 1.67 1.68 1.69  1.7 1.71 1.72 1.73 1.74 1.75 1.76 1.77 1.78 1.79 
  43   32   30   25   19   24  215  119   57   52   40   50   28   17   12   15 
 1.8 1.81 1.82 1.83 1.84 1.85 1.86 1.87 1.88 1.89  1.9 1.91 1.92 1.93 1.94 1.95 
  21    9   13   18    4    3    9    7    4    4    7   12    2    6    3    3 
1.96 1.97 1.98 1.99    2 2.01 2.02 2.03 2.04 2.05 2.06 2.07 2.08 2.09  2.1 2.11 
   4    4    5    3  265  440  177  122   86   67   60   50   41   45   52   43 
2.12 2.13 2.14 2.15 2.16 2.17 2.18 2.19  2.2 2.21 2.22 2.23 2.24 2.25 2.26 2.27 
  25   21   48   22   25   18   31   22   32   23   27   13   16   18   15   12 
2.28 2.29  2.3 2.31 2.32 2.33 2.34 2.35 2.36 2.37 2.38 2.39  2.4 2.41 2.42 2.43 
  20   17   21   13   16    9    5    7    8    6    8    7   13    5    8    6 
2.44 2.45 2.46 2.47 2.48 2.49  2.5 2.51 2.52 2.53 2.54 2.55 2.56 2.57 2.58 2.59 
   4    4    3    3    9    3   17   17    9    8    9    3    3    3    3    1 
 2.6 2.61 2.63 2.64 2.65 2.66 2.67 2.68  2.7 2.71 2.72 2.74 2.75 2.77  2.8    3 
   3    3    3    1    1    3    1    2    1    1    3    3    2    1    2    8 
3.01 3.02 3.04 3.05 3.11 3.22 3.24  3.4  3.5 3.51 3.65 3.67    4 4.01 4.13  4.5 
  14    1    2    1    1    1    1    1    1    1    1    1    1    2    1    1 
5.01 
   1 
# Histogramm des Gewichts
hist(diamonds$carat, breaks = c(0,0.25,0.5,0.75,1,1.5,2,2.5,5.5))

# empirische Verteilungsfunktion des Gewichts
plot(ecdf(diamonds$carat))

# Histogramm der Preise
hist(diamonds$price,
     breaks = c(0,1000,2000,3000,4000,5000,7500,10000,20000))

## Arithmetisches Mittel und Median

mean(diamonds$price)
[1] 3932.8
median(diamonds$price)
[1] 2401
mean(diamonds$carat)
[1] 0.7979397
median(diamonds$carat)
[1] 0.7
## Quantile

quantile(diamonds$price, probs = c(0,0.25,0.5,0.75,1))
      0%      25%      50%      75%     100% 
  326.00   950.00  2401.00  5324.25 18823.00 
summary(diamonds$price)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
    326     950    2401    3933    5324   18823 
## Boxplot

boxplot(diamonds$price, horizontal = TRUE)

hist(diamonds$price)

boxplot(diamonds$price ~ diamonds$color)

Vorlesung vom 28./29.11.2025

# Übungsaufgabe 2.5

bz <- c(40, 20, 22, 15, 18, 51, 37, 42, 31, 58, 
        33, 39, 49, 22, 23, 62, 42, 53, 43, 44, 
        19, 49, 39, 36, 37, 38, 22, 24, 32, 29, 
        41, 40, 39, 38, 27, 51, 52, 54, 28, 22, 
        64, 19, 50, 40, 18, 68, 51, 41, 48, 57)

hist(bz, breaks = c(0,20,30,40,50,70), 
     right = FALSE)

# Übungsaufgabe 4.1

nkm <- c(300,250,400,500,250,600,300,300,450,400)


## Mittlere absolute Abweichung

mean(abs(nkm-median(nkm)))
[1] 95
## Median der absoluten Abweichung

median(abs(nkm-median(nkm)))
[1] 75
mad(nkm, constant = 1)
[1] 75
sum((nkm-mean(nkm))^2)
[1] 121250
var(nkm)
[1] 13472.22
## Aufgabe 4.7

bz <- c(40, 20, 22, 15, 18, 51, 37, 42, 31, 58, 
        33, 39, 49, 22, 23, 62, 42, 53, 43, 44, 
        19, 49, 39, 36, 37, 38, 22, 24, 32, 29, 
        41, 40, 39, 38, 27, 51, 52, 54, 28, 22, 
        64, 19, 50, 40, 18, 68, 51, 41, 48, 57)

var(bz) * (length(bz)-1)/length(bz)
[1] 178.8244

Vorlesung vom 12.12.2025

## Aufgabe 5.1

daten <- data.frame(forschung  = c(15,15,20,25,15,20,30,50,35,30,25,20),
                    werbung    = c(40,25,20,20,25,20,15,10,10,20,20,15))


n <- nrow(daten)

### Kovarianz
cov(daten$forschung, 
    daten$werbung) * (n-1)/n
[1] -54.16667
### Korrelationskoeffizient nach Bravais-Pearson
cor(daten$forschung, 
    daten$werbung)
[1] -0.7244617
### Visualisierung
plot(x = daten$forschung, 
     y = daten$werbung,
     xlab = "Ausgabaen für Forschung",
     ylab = "Ausgaben für Werbung")
abline(h = mean(daten$werbung), lty = 2, col = "red")
abline(v = mean(daten$forschung), lty = 2, col = "red")

# Diamantendatensatz
library(ggplot2)

cor(diamonds$carat, diamonds$price)
[1] 0.9215913
cor(diamonds$carat, diamonds$table)
[1] 0.1816175
plot(diamonds$carat, diamonds$price)

# Berechnung Korrelationsmatrix

install.packages("MSBStatsData")
The following package(s) will be installed:
- MSBStatsData [0.0.2]
These packages will be installed into "~/RProjects/fhweb/fhweb/renv/library/macos/R-4.5/aarch64-apple-darwin20".

# Installing packages --------------------------------------------------------
- Installing MSBStatsData ...                   OK [linked from cache]
Successfully installed 1 package in 4.7 milliseconds.
library(MSBStatsData)
?decathlon

round(cor(decathlon), 2)
                race100m longjump shotput highjump race400m race110mhurdles
race100m            1.00    -0.48   -0.15    -0.12     0.57            0.45
longjump           -0.48     1.00    0.25     0.36    -0.31           -0.38
shotput            -0.15     0.25    1.00     0.16    -0.03           -0.25
highjump           -0.12     0.36    0.16     1.00    -0.11           -0.25
race400m            0.57    -0.31   -0.03    -0.11     1.00            0.38
race110mhurdles     0.45    -0.38   -0.25    -0.25     0.38            1.00
discus             -0.12     0.20    0.72     0.14    -0.04           -0.22
polevault          -0.17     0.27    0.25     0.19    -0.13           -0.29
javelinthrow       -0.06     0.17    0.44     0.07    -0.02           -0.13
race1500m          -0.09     0.02    0.11    -0.01     0.38            0.01
                discus polevault javelinthrow race1500m
race100m         -0.12     -0.17        -0.06     -0.09
longjump          0.20      0.27         0.17      0.02
shotput           0.72      0.25         0.44      0.11
highjump          0.14      0.19         0.07     -0.01
race400m         -0.04     -0.13        -0.02      0.38
race110mhurdles  -0.22     -0.29        -0.13      0.01
discus            1.00      0.27         0.42      0.08
polevault         0.27      1.00         0.19     -0.01
javelinthrow      0.42      0.19         1.00      0.02
race1500m         0.08     -0.01         0.02      1.00
plot(decathlon$discus, decathlon$shotput)

plot(decathlon$race100m, decathlon$longjump)

## Rangkorrelation

diamonds
# A tibble: 53,940 × 10
   carat cut       color clarity depth table price     x     y     z
   <dbl> <ord>     <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
 1  0.23 Ideal     E     SI2      61.5    55   326  3.95  3.98  2.43
 2  0.21 Premium   E     SI1      59.8    61   326  3.89  3.84  2.31
 3  0.23 Good      E     VS1      56.9    65   327  4.05  4.07  2.31
 4  0.29 Premium   I     VS2      62.4    58   334  4.2   4.23  2.63
 5  0.31 Good      J     SI2      63.3    58   335  4.34  4.35  2.75
 6  0.24 Very Good J     VVS2     62.8    57   336  3.94  3.96  2.48
 7  0.24 Very Good I     VVS1     62.3    57   336  3.95  3.98  2.47
 8  0.26 Very Good H     SI1      61.9    55   337  4.07  4.11  2.53
 9  0.22 Fair      E     VS2      65.1    61   337  3.87  3.78  2.49
10  0.23 Very Good H     VS1      59.4    61   338  4     4.05  2.39
# ℹ 53,930 more rows
cor(rank(diamonds$cut, ties.method = "average"),
    rank(diamonds$price, ties.method = "average"),
    method = "spearman")
[1] -0.09297484
cor(rank(diamonds$color, ties.method = "average"),
    rank(diamonds$price, ties.method = "average"),
    method = "spearman")
[1] 0.1501422
cor(rank(diamonds$clarity, ties.method = "average"),
    rank(diamonds$price, ties.method = "average"),
    method = "spearman")
[1] -0.2115275
cor(diamonds$carat, diamonds$price,
    method = "spearman")
[1] 0.9628828
cor(diamonds$carat, diamonds$price,
    method = "kendall")
[1] 0.8341049
# Aufgabe 5.6

werte <- c(10,10,60,80,
           10,20,20,20,
           30,20,10,80,
           40,70,10,10)
kontingenztafel <- matrix(werte, nrow = 4, byrow = TRUE)


erg <- chisq.test(kontingenztafel)

n <- sum(kontingenztafel)
k <- min(nrow(kontingenztafel),
         nrow(kontingenztafel))


sqrt(erg$statistic/(n*(k-1)))
X-squared 
0.3669547 
## diamonds

kontingenztafel <- table(diamonds$cut, diamonds$color)

chisq.test(kontingenztafel)

    Pearson's Chi-squared test

data:  kontingenztafel
X-squared = 310.32, df = 24, p-value < 2.2e-16
n <- sum(kontingenztafel)
k <- min(nrow(kontingenztafel),
         nrow(kontingenztafel))


sqrt(erg$statistic/(n*(k-1)))
 X-squared 
0.03059656 

Vorlesung vom 19.12.2025

# Übungsaufgabe 2.5

bz <- c(40, 20, 22, 15, 18, 51, 37, 42, 31, 58, 
        33, 39, 49, 22, 23, 62, 42, 53, 43, 44, 
        19, 49, 39, 36, 37, 38, 22, 24, 32, 29, 
        41, 40, 39, 38, 27, 51, 52, 54, 28, 22, 
        64, 19, 50, 40, 18, 68, 51, 41, 48, 57)

hist(bz, breaks = c(0,20,30,40,50,70), 
     right = FALSE)

# Übungsaufgabe 4.1

nkm <- c(300,250,400,500,250,600,300,300,450,400)


## Mittlere absolute Abweichung

mean(abs(nkm-median(nkm)))
[1] 95
## Median der absoluten Abweichung

median(abs(nkm-median(nkm)))
[1] 75
mad(nkm, constant = 1)
[1] 75
sum((nkm-mean(nkm))^2)
[1] 121250
var(nkm)
[1] 13472.22
## Aufgabe 4.7

bz <- c(40, 20, 22, 15, 18, 51, 37, 42, 31, 58, 
        33, 39, 49, 22, 23, 62, 42, 53, 43, 44, 
        19, 49, 39, 36, 37, 38, 22, 24, 32, 29, 
        41, 40, 39, 38, 27, 51, 52, 54, 28, 22, 
        64, 19, 50, 40, 18, 68, 51, 41, 48, 57)

var(bz) * (length(bz)-1)/length(bz)
[1] 178.8244
# Simulation Würfelwurf

w <- sample(x = c(1,2,3,4,5,6), size = 1000000, replace = TRUE)
table(w)/1000000
w
       1        2        3        4        5        6 
0.166729 0.166580 0.166529 0.167020 0.166139 0.167003 
## Gleichverteilung


dunif(-0.5, min = 0, max = 1)
[1] 0
punif(0.5, min = 0, max = 1)
[1] 0.5
x <- runif(1000000, min = 0, max = 5)
hist(x)

Vorlesung vom 09.01.2026

# Aufgabe 8.7

# (a)
pbinom(2, 4, 0.1)
[1] 0.9963
dbinom(0, 4, 0.1) + dbinom(1, 4, 0.1) +dbinom(2, 4, 0.1)
[1] 0.9963
# (b)
dbinom(1,2,0.4)
[1] 0.48
dbinom(2,4,0.4)
[1] 0.3456
# Aufgabe 8.9

# (a)

1-pnorm(5, mean = 0.5, sd = 2)
[1] 0.01222447
1-pnorm(2.25)
[1] 0.01222447
# (b)
pnorm(4,0.5,23)-pnorm(-4,0.5,23)
[1] 0.1380339
# Aufgabe zur Riesterrente

# (b)

# exakt mit Binomialverteilung:
1-pbinom(100,900,0.1)
[1] 0.122629
# näherungsweise mit ZGWS/Standardnormalverteilung
1-pnorm(10/9)
[1] 0.1332603
Zurück nach oben