library(tidyverse)
── Attaching packages ──────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
✔ ggplot2 3.2.1     ✔ purrr   0.3.2
✔ tibble  2.1.3     ✔ dplyr   0.8.3
✔ tidyr   1.0.0     ✔ stringr 1.4.0
✔ readr   1.3.1     ✔ forcats 0.4.0
── Conflicts ─────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter()  masks stats::filter()
✖ purrr::is_null() masks testthat::is_null()
✖ dplyr::lag()     masks stats::lag()
✖ dplyr::matches() masks tidyr::matches(), testthat::matches()
library(plotly)

Attaching package: ‘plotly’

The following object is masked from ‘package:ggplot2’:

    last_plot

The following object is masked from ‘package:stats’:

    filter

The following object is masked from ‘package:graphics’:

    layout

Generación de datos aleatorios

Para generar datos aleatorios, usamos las funciones:

  • rnorm para generar datos que surgen de una distribución normal
  • rt para generar datos que surgen de una distribución T-student
  • rchisq para generar datos que surgen de una distribución Chi cuadrado
  • runif para generar datos que surgen de una distribución uniforme > Pero antes, tenemos que fijar la semilla para que los datos sean reproducibles
set.seed(1234)
rnorm(n = 15,mean = 0, sd = 1 )
 [1] -1.20706575  0.27742924  1.08444118 -2.34569770  0.42912469  0.50605589 -0.57473996 -0.54663186 -0.56445200
[10] -0.89003783 -0.47719270 -0.99838644 -0.77625389  0.06445882  0.95949406
rt(n = 15,df=1 )
 [1] -0.363717710 -1.603466805 -0.388596796 -0.588007490  0.007839245 14.690527710 -1.863488555  0.022667470
 [9] -2.084247299 -0.249237745 -1.311594174 -3.569055208 -2.490838240 -3.848779244 -4.271087169
rchisq(n = 15,df=1)
 [1] 0.5317744 1.4263809 4.2797098 0.2184660 0.6923773 0.0455256 3.1902100 0.2949942 0.5403827 0.1543732 0.8639196
[12] 0.1417290 1.1386091 0.2966193 0.5110879
runif(15,0,1)
 [1] 0.75911999 0.42403021 0.56088725 0.11613577 0.30302180 0.47880269 0.34483055 0.60071414 0.07608332 0.95599261
[11] 0.02220682 0.84171063 0.63244245 0.31009417 0.74256937

hagamos un ggplot para visualizar la info

tibble(normal = rnorm(n = 15,mean = 0, sd = 1 ),
       tstudent = rt(n = 15,df=1 ),
       chi = rchisq(n = 15,df=1),
       uniforme = runif(15,0,1)) %>%
  gather(distribucion,valor) %>% 
  ggplot(aes(distribucion,valor,fill=distribucion))+
  geom_violin()+
  facet_wrap(.~distribucion,scales = 'free')

Qué pasa si lo corremos varias veces?

Tests

dist1 <- rnorm(100, 10,sd = 1)
dist2 <- rnorm(100, 15, sd = 1)


t.test(dist1,dist2, paired = F,var.equal = TRUE)

    Two Sample t-test

data:  dist1 and dist2
t = -33.391, df = 198, p-value < 2.2e-16
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -5.059234 -4.494982
sample estimates:
mean of x mean of y 
 10.17864  14.95575 
dist1 <- rnorm(10, 10,sd = 1)
dist2 <- rnorm(10, 15, sd = 1)


t.test(dist1,dist2, paired = F,var.equal = TRUE)

    Two Sample t-test

data:  dist1 and dist2
t = -8.9832, df = 18, p-value = 4.529e-08
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -5.963134 -3.702580
sample estimates:
mean of x mean of y 
 10.05722  14.89008 
dist1 <- rnorm(5, 10,sd = 1)
dist2 <- rnorm(5, 15, sd = 1)


t.test(dist1,dist2, paired = F,var.equal = TRUE)

    Two Sample t-test

data:  dist1 and dist2
t = -6.8836, df = 8, p-value = 0.0001266
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -5.335167 -2.657580
sample estimates:
mean of x mean of y 
 10.07898  14.07535 
dist1 <- rnorm(20, 10,sd = 2)
dist2 <- rnorm(20, 11, sd = 1)


t.test(dist1,dist2, paired = F,var.equal = F)

    Welch Two Sample t-test

data:  dist1 and dist2
t = -3.618, df = 29.93, p-value = 0.00108
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -2.5848637 -0.7194778
sample estimates:
mean of x mean of y 
  9.68726  11.33943 

Descripción estadística de los datos

Volvamos a ver los datos de sueldos de funcionarios

sueldos <- read_csv('../fuentes/sueldo_funcionarios_2019.csv')
Parsed with column specification:
cols(
  cuil = col_character(),
  anio = col_double(),
  mes = col_double(),
  funcionario_apellido = col_character(),
  funcionario_nombre = col_character(),
  repartición = col_character(),
  asignacion_por_cargo_i = col_double(),
  aguinaldo_ii = col_double(),
  `total_salario_bruto_i_+_ii` = col_double(),
  observaciones = col_character()
)

Con el comando summary podemos ver algunos de los principales estadísticos de resumen

summary(sueldos$asignacion_por_cargo_i)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 197746  210061  226866  225401  231168  249662 

Gráficos estadísticos

A modo de ejemplo, dejamos los comandos de R base para realizar gráficos.


mamiferos <- openintro::mammals


ggplotly(ggplot(mamiferos, aes(y=LifeSpan))+
  geom_boxplot() )
Removed 4 rows containing non-finite values (stat_boxplot).

ggplotly(ggplot(mamiferos, aes(BodyWt, BrainWt,label=Species))+
  geom_point() )


ggplotly(ggplot(mamiferos, aes(log(BodyWt), log(BrainWt),label=Species))+
  geom_point() )

NA
LS0tCnRpdGxlOiBQcm9iYWJpbGlkYWQgeSBFc3RhZMOtc3RpY2EKb3V0cHV0OgogIGh0bWxfbm90ZWJvb2s6CiAgICB0b2M6IHllcwogICAgdG9jX2Zsb2F0OiB5ZXMKZGF0ZTogIiIKc3VidGl0bGU6IFByw6FjdGljYSBHdWlhZGEKLS0tCgpgYGB7cn0KbGlicmFyeSh0aWR5dmVyc2UpCmxpYnJhcnkocGxvdGx5KQpgYGAKCgojIyMgR2VuZXJhY2nDs24gZGUgZGF0b3MgYWxlYXRvcmlvcwoKUGFyYSBnZW5lcmFyIGRhdG9zIGFsZWF0b3Jpb3MsIHVzYW1vcyBsYXMgZnVuY2lvbmVzOgoKLSBgcm5vcm1gIHBhcmEgZ2VuZXJhciBkYXRvcyBxdWUgc3VyZ2VuIGRlIHVuYSBkaXN0cmlidWNpw7NuIG5vcm1hbAotIGBydGAgcGFyYSBnZW5lcmFyIGRhdG9zIHF1ZSBzdXJnZW4gZGUgdW5hIGRpc3RyaWJ1Y2nDs24gVC1zdHVkZW50Ci0gYHJjaGlzcWAgcGFyYSBnZW5lcmFyIGRhdG9zIHF1ZSBzdXJnZW4gZGUgdW5hIGRpc3RyaWJ1Y2nDs24gQ2hpIGN1YWRyYWRvCi0gYHJ1bmlmYCBwYXJhIGdlbmVyYXIgZGF0b3MgcXVlIHN1cmdlbiBkZSB1bmEgZGlzdHJpYnVjacOzbiB1bmlmb3JtZQo+IFBlcm8gYW50ZXMsIHRlbmVtb3MgcXVlIGZpamFyIGxhIF9zZW1pbGxhXyBwYXJhIHF1ZSBsb3MgZGF0b3Mgc2VhbiByZXByb2R1Y2libGVzCgpgYGB7cn0Kc2V0LnNlZWQoMTIzNCkKcm5vcm0obiA9IDE1LG1lYW4gPSAwLCBzZCA9IDEgKQpydChuID0gMTUsZGY9MSApCnJjaGlzcShuID0gMTUsZGY9MSkKcnVuaWYoMTUsMCwxKQpgYGAKCgpoYWdhbW9zIHVuIGdncGxvdCBwYXJhIHZpc3VhbGl6YXIgbGEgaW5mbwoKYGBge3J9CnRpYmJsZShub3JtYWwgPSBybm9ybShuID0gMTUsbWVhbiA9IDAsIHNkID0gMSApLAogICAgICAgdHN0dWRlbnQgPSBydChuID0gMTUsZGY9MSApLAogICAgICAgY2hpID0gcmNoaXNxKG4gPSAxNSxkZj0xKSwKICAgICAgIHVuaWZvcm1lID0gcnVuaWYoMTUsMCwxKSkgJT4lCiAgZ2F0aGVyKGRpc3RyaWJ1Y2lvbix2YWxvcikgJT4lIAogIGdncGxvdChhZXMoZGlzdHJpYnVjaW9uLHZhbG9yLGZpbGw9ZGlzdHJpYnVjaW9uKSkrCiAgZ2VvbV92aW9saW4oKSsKICBmYWNldF93cmFwKC5+ZGlzdHJpYnVjaW9uLHNjYWxlcyA9ICdmcmVlJykKYGBgCgo+IFF1w6kgcGFzYSBzaSBsbyBjb3JyZW1vcyB2YXJpYXMgdmVjZXM/CgojIyMgVGVzdHMKCgpgYGB7cn0KZGlzdDEgPC0gcm5vcm0oMTAwLCAxMCxzZCA9IDEpCmRpc3QyIDwtIHJub3JtKDEwMCwgMTUsIHNkID0gMSkKCgp0LnRlc3QoZGlzdDEsZGlzdDIsIHBhaXJlZCA9IEYsdmFyLmVxdWFsID0gVFJVRSkKYGBgCgpgYGB7cn0KZGlzdDEgPC0gcm5vcm0oMTAsIDEwLHNkID0gMSkKZGlzdDIgPC0gcm5vcm0oMTAsIDE1LCBzZCA9IDEpCgoKdC50ZXN0KGRpc3QxLGRpc3QyLCBwYWlyZWQgPSBGLHZhci5lcXVhbCA9IFRSVUUpCmBgYAoKYGBge3J9CmRpc3QxIDwtIHJub3JtKDUsIDEwLHNkID0gMSkKZGlzdDIgPC0gcm5vcm0oNSwgMTUsIHNkID0gMSkKCgp0LnRlc3QoZGlzdDEsZGlzdDIsIHBhaXJlZCA9IEYsdmFyLmVxdWFsID0gVFJVRSkKYGBgCgpgYGB7cn0KZGlzdDEgPC0gcm5vcm0oMjAsIDEwLHNkID0gMikKZGlzdDIgPC0gcm5vcm0oMjAsIDExLCBzZCA9IDEpCgoKdC50ZXN0KGRpc3QxLGRpc3QyLCBwYWlyZWQgPSBGLHZhci5lcXVhbCA9IEYpCmBgYAoKIyMjIERlc2NyaXBjacOzbiBlc3RhZMOtc3RpY2EgZGUgbG9zIGRhdG9zCgpWb2x2YW1vcyBhIHZlciBsb3MgZGF0b3MgZGUgW3N1ZWxkb3MgZGUgZnVuY2lvbmFyaW9zXShodHRwczovL2RhdGEuYnVlbm9zYWlyZXMuZ29iLmFyL2RhdGFzZXQvc3VlbGRvLWZ1bmNpb25hcmlvcykKCgpgYGB7cn0Kc3VlbGRvcyA8LSByZWFkX2NzdignLi4vZnVlbnRlcy9zdWVsZG9fZnVuY2lvbmFyaW9zXzIwMTkuY3N2JykKYGBgCgpDb24gZWwgY29tYW5kbyBgc3VtbWFyeWAgcG9kZW1vcyB2ZXIgYWxndW5vcyBkZSBsb3MgcHJpbmNpcGFsZXMgZXN0YWTDrXN0aWNvcyBkZSByZXN1bWVuCgpgYGB7cn0Kc3VtbWFyeShzdWVsZG9zJGFzaWduYWNpb25fcG9yX2NhcmdvX2kpCmBgYAoKCiMjIyBHcsOhZmljb3MgZXN0YWTDrXN0aWNvcwoKCkEgbW9kbyBkZSBlamVtcGxvLCBkZWphbW9zIGxvcyBjb21hbmRvcyBkZSBSIGJhc2UgcGFyYSByZWFsaXphciBncsOhZmljb3MuCgpgYGB7cn0KCm1hbWlmZXJvcyA8LSBvcGVuaW50cm86Om1hbW1hbHMKCgpnZ3Bsb3RseShnZ3Bsb3QobWFtaWZlcm9zLCBhZXMoeT1MaWZlU3BhbikpKwogIGdlb21fYm94cGxvdCgpICkKCgoKZ2dwbG90bHkoZ2dwbG90KG1hbWlmZXJvcywgYWVzKEJvZHlXdCwgQnJhaW5XdCxsYWJlbD1TcGVjaWVzKSkrCiAgZ2VvbV9wb2ludCgpICkKCmdncGxvdGx5KGdncGxvdChtYW1pZmVyb3MsIGFlcyhsb2coQm9keVd0KSwgbG9nKEJyYWluV3QpLGxhYmVsPVNwZWNpZXMpKSsKICBnZW9tX3BvaW50KCkgKQoKYGBgCgoKCgoK