Code aus der Vorlesung

Vorlesung vom 26.06.2026

# Stetige Gleichverteilung X~GV(a,b)

## Beispiel Zugverspätung, Annahme:
## Keine Verspätungen <0 oder >60 möglich, dazwischen alles
## gleich wahrscheinlich

### Wahrscheinlichkeit für höchsten 20 Minuten Verspätung
### P(X <= 20) = F(20)

punif(20, min = 0, max = 60)

[1] 0.3333333

### Wahrscheinlichkeit für mindestens 20 Minuten
### P(X >= 20) = 1 - P(X <= 20)

1-punif(20, min = 0, max = 60)

[1] 0.6666667

### Wahrscheinlichkeit für mindestens 20 Minuten
### P(20 <= X <= 40) = P(X <= 40) - P(X <= 20)
### = F(40) - F(20)

punif(40, 0, 60) - punif(20, 0, 60)

[1] 0.3333333

### Wahrscheinlichkeit für genau 20 Minuten Verspätung
### P(X = 20) = 0, das ist nicht das gleiche wie f(20) = 1/60
dunif(20, 0, 60)

[1] 0.01666667

### Zufallszahlen erzeugen

x <- runif(100000, min = 0, max = 60)
hist(x, xlim = c(-10,70), freq = FALSE, breaks = 100)

y <- rnorm(10000)
hist(y, xlim = c(-5,5), freq = FALSE)

# Binomialverteilung

## Wahrscheinlichkeit für 2 Mädchen unter 4 Kindern
## P(X = 2)

dbinom(2, size = 4, prob = 0.49)

[1] 0.3747001

## Wahrscheinlichkeit für höchstens 3 Mädchen
## P(X <= 3) = F(3)

pbinom(3, size = 4, prob = 0.49)

[1] 0.942352

## P(X <= 3) = P(X = 0) + P(X = 1) + P(X = 2) + P(X = 3)
##           = f(0) + f(1) + f(2) + f(3)

sum(dbinom(0:3, size = 4, prob = 0.49))

[1] 0.942352

# Aufgabe 8.14

# (a)
# zweimotorige Maschine: P(X<=1) = F(1)

pbinom(1,2,0.1)

[1] 0.99

pbinom(1,2,0.4)

[1] 0.84

dbinom(0:2, 2, 0.1)

[1] 0.81 0.18 0.01

# viermotorige Maschine: P(X<=2) = F(2)

pbinom(2,4,0.1)

[1] 0.9963

pbinom(2,4,0.4)

[1] 0.8208

Vorlesung vom 29.05.2026

# Lineare Regression

library(ggplot2)

lr_diamonds <- lm(price ~ carat, data = diamonds)
lr_diamonds


Call:
lm(formula = price ~ carat, data = diamonds)

Coefficients:
(Intercept)        carat  
      -2256         7756

plot(x = diamonds$carat,
     y = diamonds$price,
     xlab = "Gewicht in Karat",
     ylab = "Preis in USD")
abline(lr_diamonds, col = "red")

summary(lr_diamonds)


Call:
lm(formula = price ~ carat, data = diamonds)

Residuals:
     Min       1Q   Median       3Q      Max 
-18585.3   -804.8    -18.9    537.4  12731.7 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) -2256.36      13.06  -172.8   <2e-16 ***
carat        7756.43      14.07   551.4   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1549 on 53938 degrees of freedom
Multiple R-squared:  0.8493,    Adjusted R-squared:  0.8493 
F-statistic: 3.041e+05 on 1 and 53938 DF,  p-value: < 2.2e-16

plot(lr_diamonds, which = 4)

# Im einfachen linearen Regressionsmodell (mit y-Achsenabschnitt)
# ist das Bestimmtheitsmaß gleich der quadrierten Korrelation (nach Bravais-Pearso)
cor(diamonds$carat, diamonds$price)^2

[1] 0.8493305

# Modelldiagnostik

## Cook's distance

plot(anscombe[,c(3,7)])
ans3 <- lm(y3 ~ x3, data = anscombe)
abline(ans3, col = "red")

plot(ans3, which = 4)

# Residual vs. fitted
plot(anscombe[,c(2,6)])
ans2 <- lm(y2 ~ x2, data = anscombe)
abline(ans2, col = "red")

plot(ans2, which = 1)

# Multiple lineare Regression mit nominal skalierten Merkmalen
diamonds$cut <- factor(diamonds$cut, ordered = FALSE)
diamonds$clarity <- factor(diamonds$clarity, ordered = FALSE)
diamonds$color <- factor(diamonds$color, ordered = FALSE)

lr_diamonds <- lm(price ~ carat + cut + clarity + color, data = diamonds)
summary(lr_diamonds)


Call:
lm(formula = price ~ carat + cut + clarity + color, data = diamonds)

Residuals:
     Min       1Q   Median       3Q      Max 
-16813.5   -680.4   -197.6    466.4  10394.9 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)  -7362.80      51.68 -142.46   <2e-16 ***
carat         8886.13      12.03  738.44   <2e-16 ***
cutGood        655.77      33.63   19.50   <2e-16 ***
cutVery Good   848.72      31.28   27.14   <2e-16 ***
cutPremium     869.40      30.93   28.11   <2e-16 ***
cutIdeal       998.25      30.66   32.56   <2e-16 ***
claritySI2    2625.95      44.79   58.63   <2e-16 ***
claritySI1    3573.69      44.60   80.13   <2e-16 ***
clarityVS2    4217.83      44.84   94.06   <2e-16 ***
clarityVS1    4534.88      45.54   99.59   <2e-16 ***
clarityVVS2   4967.20      46.89  105.93   <2e-16 ***
clarityVVS1   5072.03      48.21  105.20   <2e-16 ***
clarityIF     5419.65      52.14  103.95   <2e-16 ***
colorE        -211.68      18.32  -11.56   <2e-16 ***
colorF        -303.31      18.51  -16.39   <2e-16 ***
colorG        -506.20      18.12  -27.93   <2e-16 ***
colorH        -978.70      19.27  -50.78   <2e-16 ***
colorI       -1440.30      21.65  -66.54   <2e-16 ***
colorJ       -2325.22      26.72  -87.01   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1157 on 53921 degrees of freedom
Multiple R-squared:  0.9159,    Adjusted R-squared:  0.9159 
F-statistic: 3.264e+04 on 18 and 53921 DF,  p-value: < 2.2e-16

library(car)

Loading required package: carData

vif(lr_diamonds)

            GVIF Df GVIF^(1/(2*Df))
carat   1.311363  1        1.145148
cut     1.105469  4        1.012613
clarity 1.296821  7        1.018739
color   1.164850  6        1.012797

Vorlesung vom 08.05.2026

# Berechnung von Kovarianz und Korrelation

## Erzeugen des Datensatzes
df_icecream <- data.frame(
  Temperatur =  c(10,  15,  20,  15,  25,
                  25,  30,  30,  40,  40),
  Eismenge =    c(2000,2000,6000,4000,6000,
                  4000,4000,7000,7000,8000)
)

## korrigierte Stichprobenkovarianz
cov(df_icecream$Temperatur,
    df_icecream$Eismenge)

[1] 17777.78

## gewöhnliche Stichprobenkovarianz
n <- nrow(df_icecream)
(n-1)/n * cov(df_icecream$Temperatur,
              df_icecream$Eismenge)

[1] 16000

## Berechnung der Korrelation
cor(df_icecream$Temperatur,
    df_icecream$Eismenge)

[1] 0.8207827

## Beispiel: Zehnkampfdaten
library(MSBStatsData)

cor(decathlon$race100m,
    decathlon$longjump)

[1] -0.4838989

plot(x = decathlon$race100m,
     y = decathlon$longjump,
     xlab = "100m-Lauf (in sek)",
     ylab = "Weitsprung (in m)")

Vorlesung vom 24.04.2026

# ggf. einmalig installieren: 
install.packages("remotes")

The following package(s) will be installed:
- remotes [2.5.0]
These packages will be installed into "~/RProjects/fhweb/fhweb/renv/library/macos/R-4.5/aarch64-apple-darwin20".

# Installing packages --------------------------------------------------------
- Installing remotes 2.5.0 ...                  OK [linked from cache]
Successfully installed 1 package in 3.4 milliseconds.

remotes::install_github("mchlbckr/MSBStatsData")

Using GitHub PAT from the git credential store.

Skipping install of 'MSBStatsData' from a github remote, the SHA1 (97a5cfc0) has not changed since last install.
  Use `force = TRUE` to force installation

library(MSBStatsData) 
cinema_visitors

# A tibble: 11 × 2
   viewers  days
     <dbl> <dbl>
 1      41     1
 2      42     9
 3      43    13
 4      44    13
 5      45    20
 6      46    15
 7      47    10
 8      48     7
 9      49     5
10      50     4
11      51     3

company_financials

# A tibble: 4 × 5
  company      employees annual_revenue_mio_eur equity_share_pct credit_rating
  <chr>            <dbl>                  <dbl>            <dbl> <chr>        
1 Grunwol GmbH        26                   3.56             21.7 ausgezeichnet
2 Technik AG          17                  25.9              11.9 sehr gut     
3 Kimonade UG          3                   0.08             70   ausreichend  
4 Tiefbau GmbH       115                  15.1              25.4 gut

# Lagemaße

# install.packages("ggplot2") 
library(ggplot2)

mean(diamonds$price)

[1] 3932.8

hist(diamonds$price)

median(diamonds$price)

[1] 2401

boxplot(diamonds$price, horizontal = TRUE)

mean(diamonds$carat)

[1] 0.7979397

median(diamonds$carat)

[1] 0.7

boxplot(diamonds$carat, horizontal = TRUE)

## Modus
which.max(table(diamonds$carat))

0.3 
 11

# Aufgabe 4.1

## Varianz
var(cold_rents$monthly_rent_eur)

[1] 13472.22

n <- length(cold_rents$monthly_rent_eur)
var(cold_rents$monthly_rent_eur)*(n-1)/n

[1] 12125

Vorlesung vom 17.04.2026

1+1

[1] 2

log(x = 8, base = 2)

[1] 3

log(8, 2)

[1] 3

log(8)

[1] 2.079442

alter <- 33
name <- "Peter"

name + 1

Error in `name + 1`:
! non-numeric argument to binary operator

alter + 1

[1] 34

## Erstellen eines Vektors mit den Verspätungen
verspaetungen <- c(10, 20, 5, 10, 30, 
                   25, 5, 5, 10, 20, 
                   15, 10, 5, 20, 15, 
                   10, 5, 20, 25, 10)


table(verspaetungen)

verspaetungen
 5 10 15 20 25 30 
 5  6  2  4  2  1

prop.table(table(verspaetungen))

verspaetungen
   5   10   15   20   25   30 
0.25 0.30 0.10 0.20 0.10 0.05

# Beschleunigung der Fahrzeuge

## Häufigkeiten
table(mtcars$qsec) # das hier ist die Häufigkeitstabelle


 14.5  14.6 15.41  15.5 15.84 16.46  16.7 16.87  16.9 17.02 17.05  17.3  17.4 
    1     1     1     1     1     1     1     1     1     2     1     1     1 
17.42  17.6 17.82 17.98    18  18.3 18.52  18.6 18.61  18.9 19.44 19.47  19.9 
    1     1     1     1     1     1     1     1     1     2     1     1     1 
   20 20.01 20.22  22.9 
    1     1     1     1

## Stabiagramm

plot(table(mtcars$qsec), type = "h",
     xlab = "Beschleunigung (Sekunden pro Viertelmeile)",
     ylab = "absolute Häufigkeiten")

## Balkendiagramm

barplot(table(mtcars$qsec),
     xlab = "Beschleunigung (Sekunden pro Viertelmeile)",
     ylab = "absolute Häufigkeiten")

## Histogramm
hist(mtcars$qsec, freq = FALSE)

hist(mtcars$qsec, breaks = 4)

hist(mtcars$qsec, 
     breaks = c(14,15,16,17,18,19,20,24),
     right = FALSE)

## empirische Verteilungsfunktion
plot(ecdf(mtcars$qsec))

library(ggplot2)
hist(diamonds$carat)

plot(ecdf(diamonds$carat))

hist(diamonds$price)

plot(ecdf(diamonds$price))

Zurück nach oben