# A tibble: 238 x 2
Scientific_Name .cluster
<chr> <fct>
1 Accipiter striatus 1
2 Acropternis orthonyx 1
3 Aglaeactis cupripennis 1
4 Amazilia franciae 3
5 Amazilia saucerottei 3
6 Amazilia tzacatl 1
7 Amazona mercenarius 1
8 Ampelion rubrocristatus 1
9 Anabacerthia striaticollis 1
10 Andigena hypoglauca 1
# … with 228 more rows
Datos Los datos son descargados de la plataforma Ebird sobre mis avistamientos de aves desde el 2018 en el Eje cafetero.
Referencias
---
title: "Aves_eje_Cafetero"
author: "Sandra Rairán"
output:
flexdashboard::flex_dashboard:
orientation: row
social: menu
source_code: embed
vertical_layout: fill
---
```{r setup, include=FALSE}
library(flexdashboard)
library(dplyr)
library(tidyr)
library(RColorBrewer)
library(tidyverse)
library(lubridate)
#------------------ Parameters ------------------
# Set colors
# https://www.w3.org/TR/css-color-3/#svg-color
count_color <- "mediumvioletred"
colombia_color <- "seagreen"
color <- "slateblue"
Durationmin_color <- "teal"
#------------------ Data ------------------
data_bird<- read.csv("birds_colombia.csv")
aves_colombia <- 1954
#Cantidad de especies
cant_esp <- length(unique(data_bird$Scientific_Name))
# epecies por año
# distinct values
especies_count <- group_by(data_bird, year)
especies_count <- summarise(especies_count, unique_especies = n_distinct(Scientific_Name))
especies_count<- especies_count %>% rename(Cantidad_especies = unique_especies)
#mutate
data_bird <-data_bird %>% mutate(observation_date = ymd(data_bird$Date))
bird <- data_bird %>%
select(Scientific_Name, observation_date, Count) %>%
glimpse()
# Observaciones por mes de espcies
df_observaciones <- data_bird %>%
group_by(Scientific_Name, year, Mes) %>%
summarize(observation_count = sum(Count, na.rm = TRUE)) %>%
group_by(Scientific_Name, Mes) %>%
summarize(observation_count_mean = mean(observation_count) %>% round(1)) %>%
ungroup() %>%
#complete(Scientific_Name, Mes = meses) %>%
replace_na(list(observation_count_mean = 0)) %>%
arrange(Scientific_Name, Mes)
glimpse(df_observaciones)
df_observaciones <- df_observaciones %>%
mutate(observation_count_mean_log10 = log10(observation_count_mean),
observation_count_mean_log10 = case_when(is.infinite(observation_count_mean_log10) ~ 0,
TRUE ~ observation_count_mean_log10)) %>%
select(-observation_count_mean)
# k-means-(Grupos)
df_observaciones_aves <- df_observaciones %>%
select(Scientific_Name, Mes, observation_count_mean_log10) %>%
pivot_wider(names_from = Mes, values_from = observation_count_mean_log10, names_prefix = "mes_")
df_observaciones_aves[is.na(df_observaciones_aves)] <- 0
library(purrr)
library(tibble)
library(tidyr)
library(broom)
library(dplyr)
library(ggplot2)
kclusts <- tibble(k = 1:9) %>%
mutate(
kclust = map(k, ~kmeans(df_observaciones_aves %>% select(-Scientific_Name), .x)),
tidied = map(kclust, tidy),
glanced = map(kclust, glance),
augmented = map(kclust, augment, df_observaciones_aves %>% select(-Scientific_Name))
)
kclusts
clusters <- kclusts %>%
unnest(tidied)
assignments <- kclusts %>%
unnest(augmented)
clusterings <- kclusts %>%
unnest(glanced)
#Agrupa los datos usando 4 grupos
df_kmeans <- df_observaciones_aves %>%
select(-Scientific_Name) %>%
kmeans(centers = 4)
df_clustered <- augment(df_kmeans, df_observaciones_aves) %>%
select(Scientific_Name, .cluster)
df_clustered
```
Resumen
=======================================================================
row {data-width=450}
-----------------------------------------------------------------------
### Cantidad de aves en Colombia {.value-box}
```{r}
valueBox(
value = paste(format(aves_colombia, big.mark = ","), "", sep = " "),
caption = "Cantidad de aves en Colombia",
icon = "fas fa-dove",
color = colombia_color
)
```
### Cantidad especies {.value-box}
```{r}
valueBox(
value = paste(format(cant_esp, big.mark = ","), " (",
round(100.2 * cant_esp / aves_colombia, 1),
"%)",
sep = ""
),
caption = "Total de Especies Observadas en el Eje Cafetero",
icon = "fas fa-crow",
color = count_color
)
```
Column {data-width=650}
-----------------------------------------------------------------------
### Aves observadas segun el tiempo en minutos
```{r}
plotly::plot_ly(data = data_bird) %>%
plotly::add_trace(x = ~ data_bird$Duration_Min,
y = ~ data_bird$Count,
type = "bar",
name = "Active",
marker = list(color = "goldenrod")) %>%
plotly::layout(barmode = 'stack',
yaxis = list(title = "Cantidad de Aves",
type = "log"),
xaxis = list(title = "Duracion observacion en Minutos"),
hovermode = "compare",
margin = list(
# l = 60,
# r = 40,
b = 10,
t = 10,
pad = 2
))
```
Column {data-width=350}
-----------------------------------------------------------------------
### Aves observadas segun el tiempo en minutos
```{r}
plotly::plot_ly(data = data_bird) %>%
plotly::add_trace(x = ~ data_bird$Time,
type = "histogram",
name = "Active",
marker = list(color = color)) %>%
plotly::layout(barmode = 'stack',
yaxis = list(title = "Hora de inicio pajareada",
type = "log"),
xaxis = list(title = "Hora de inicio (time)"),
hovermode = "compare",
margin = list(
# l = 60,
# r = 40,
b = 10,
t = 10,
pad = 2
))
```
### cantidad de especies por año
```{r}
library(reactable)
orange_pal <- function(x) rgb(colorRamp(c("#ffe4cc", "#ff9500"))(x), maxColorValue = 255)
reactable(especies_count, defaultSorted = list(Cantidad_especies = "desc"), columns = list(
Cantidad_especies = colDef(style = function(value) {
normalized <- (value - min(especies_count$Cantidad_especies)) / (max(especies_count$Cantidad_especies) - min(especies_count$Cantidad_especies))
color <- orange_pal(normalized)
list(background = color)
})
))
```
Gráficas(Especies por año y meses)
=======================================================================
Column {data-width=400}
------------------------------------------------------------------------------
### Observaciones por fecha
```{r}
data_bird %>%
count(observation_date) %>%
ggplot(aes(x = observation_date, y=n, group = 1)) +
geom_line( colour = "mediumvioletred", size=1, alpha=0.9, linetype=1) +
geom_point(color="mediumvioletred", size=2) +
ggtitle("Observaciones por fecha") +
labs(x = "Fecha Observacion",
y = "Observacoines")+
theme_light()
```
### Observaciones por mes
```{r}
# log10 para Para hacer que los datos con asimetría positiva sean más "normales"
df_observaciones %>%
ggplot(aes(Mes, observation_count_mean_log10)) +
geom_jitter(color="mediumorchid", size=0.7, alpha=0.5) +
geom_boxplot(fill = "palegreen", colour = "mediumseagreen") +
labs(x = "Observaciones por mes",
y = "Promedio de aves observadas (log10)")+
theme_light()
```
### Observaciones por mes
```{r}
# log10 para Para hacer que los datos con asimetría positiva sean más "normales"
df_observaciones %>%
ggplot(aes(Mes, observation_count_mean_log10)) +
geom_point(color="purple", alpha=0.8) +
labs(x = "Observaciones por mes",
y = "Promedio de aves observadas (log10)")
```
Ordenando la agrupación en clústeres de k-means
=======================================================================
Column {data-height=400}
-----------------------------------------------------------------------
### Grupos óptimos
```{r}
ggplot(clusterings, aes(k, tot.withinss)) +
geom_line() +
geom_vline(xintercept = 2, linetype = 2) +
geom_vline(xintercept = 4, linetype = 2) +
scale_x_continuous(breaks = seq(1:9)) +
labs(x = "Number of clusters")
```
### Agrupa los datos usando 4 grupos
```{r}
#Agrupa los datos usando 4 grupos
df_kmeans <- df_observaciones_aves %>%
select(-Scientific_Name) %>%
kmeans(centers = 4)
df_clustered <- augment(df_kmeans, df_observaciones_aves) %>%
select(Scientific_Name, .cluster)
df_clustered
```
Agrupacion de Cluster
=======================================================================
Column {data-weight=400}
-----------------------------------------------------------------------
### Observacones por Grupo
```{r}
vec_common_name_cluster <-
full_join(df_clustered,df_observaciones, by = c("Scientific_Name"= "Scientific_Name")) %>%
pivot_wider(names_from = Mes, values_from = observation_count_mean_log10, names_prefix = "mes_") %>%
arrange(.cluster, mes_Jan, mes_Feb, mes_Mar, mes_Apr, mes_May, mes_Jun, mes_Jul, mes_Aug, mes_Sep, mes_Oct, mes_Nov, mes_Dec) %>%
pull(Scientific_Name)
names(vec_common_name_cluster) <- c("Scientific_Name")
df_observaciones_clustered <-
full_join(df_clustered,df_observaciones, by = c("Scientific_Name"= "Scientific_Name")) %>%
mutate(Scientific_Name = factor(Scientific_Name, levels = vec_common_name_cluster))
df_observaciones_clustered %>%
mutate(.cluster = str_c("Cluster", .cluster, sep = " ")) %>%
ggplot(aes(Mes, Scientific_Name, fill = observation_count_mean_log10)) +
geom_tile() +
facet_wrap(~.cluster, scales = "free_y", ncol = 1) +
scale_fill_viridis_c("Promedio observacion (log10)") +
scale_x_discrete(expand = c(0,0)) +
scale_y_discrete(expand = c(0,0)) +
labs(x = "Observacion mes",
y = "Especies") +
theme(panel.grid = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
legend.direction = "horizontal",
legend.position="bottom",
legend.justification = "right")
```
### Observacones por Grupo
``` {r}
df_cluster_sample <- df_clustered %>%
group_by(.cluster) %>%
sample_n(10, replace = FALSE) %>%
ungroup()
df_observaciones_clustered %>%
semi_join(df_cluster_sample) %>%
mutate(.cluster = str_c("Cluster", .cluster, sep = " ")) %>%
ggplot(aes(Mes, Scientific_Name, fill = observation_count_mean_log10)) +
geom_tile() +
facet_wrap(~.cluster, scales = "free_y", ncol = 1 ) +
scale_fill_viridis_c("Promedio observacion (log10)") +
scale_x_discrete(expand = c(0,0)) +
scale_y_discrete(expand = c(0,0)) +
labs(x = "Observation month",
y = NULL) +
theme(panel.grid = element_blank(),
axis.text.y = element_text(size=5),
axis.ticks.y = element_blank(),
legend.direction = "horizontal",
legend.position="top",
legend.justification = "right")
```
Informacion
=======================================================================
**Datos**
Los datos son descargados de la plataforma Ebird sobre mis avistamientos de aves desde el 2018 en el Eje cafetero.
**Referencias**
[Refencias](https://mran.microsoft.com/snapshot/2018-07-25/web/packages/broom/vignettes/kmeans.html)