Resumen

row

Cantidad de aves en Colombia

1,954

Cantidad especies

238 (12.2%)

Column

Aves observadas segun el tiempo en minutos

Column

Aves observadas segun el tiempo en minutos

cantidad de especies por año

Gráficas(Especies por año y meses)

Column

Observaciones por fecha

Observaciones por mes

Observaciones por mes

Ordenando la agrupación en clústeres de k-means

Column

Grupos óptimos

Agrupa los datos usando 4 grupos

# A tibble: 238 x 2
   Scientific_Name            .cluster
   <chr>                      <fct>   
 1 Accipiter striatus         1       
 2 Acropternis orthonyx       1       
 3 Aglaeactis cupripennis     1       
 4 Amazilia franciae          3       
 5 Amazilia saucerottei       3       
 6 Amazilia tzacatl           1       
 7 Amazona mercenarius        1       
 8 Ampelion rubrocristatus    1       
 9 Anabacerthia striaticollis 1       
10 Andigena hypoglauca        1       
# … with 228 more rows

Agrupacion de Cluster

Column

Observacones por Grupo

Observacones por Grupo

Informacion

Datos Los datos son descargados de la plataforma Ebird sobre mis avistamientos de aves desde el 2018 en el Eje cafetero.

Referencias

Refencias

---
title: "Aves_eje_Cafetero"
author: "Sandra Rairán"
output: 
  flexdashboard::flex_dashboard:
    orientation: row
    social: menu
    source_code: embed
    vertical_layout: fill
---



```{r setup, include=FALSE}
library(flexdashboard)
library(dplyr)
library(tidyr)
library(RColorBrewer)
library(tidyverse)
library(lubridate)

#------------------ Parameters ------------------
# Set colors

# https://www.w3.org/TR/css-color-3/#svg-color
count_color <- "mediumvioletred"
colombia_color <- "seagreen"
color <- "slateblue"
Durationmin_color <- "teal"

#------------------ Data ------------------

data_bird<- read.csv("birds_colombia.csv")

aves_colombia <- 1954
#Cantidad de especies
cant_esp <- length(unique(data_bird$Scientific_Name))

# epecies por año


# distinct values
especies_count <-  group_by(data_bird, year)
especies_count <- summarise(especies_count, unique_especies = n_distinct(Scientific_Name))
especies_count<- especies_count %>% rename(Cantidad_especies = unique_especies) 

#mutate

data_bird <-data_bird %>%  mutate(observation_date = ymd(data_bird$Date))

bird <- data_bird %>% 
  select(Scientific_Name, observation_date, Count) %>% 
  glimpse()


# Observaciones por mes de espcies
df_observaciones <- data_bird %>%
  group_by(Scientific_Name, year, Mes) %>% 
  summarize(observation_count = sum(Count, na.rm = TRUE)) %>% 
  group_by(Scientific_Name, Mes) %>% 
  summarize(observation_count_mean = mean(observation_count) %>% round(1)) %>% 
  ungroup() %>% 
  #complete(Scientific_Name, Mes = meses) %>% 
  replace_na(list(observation_count_mean = 0)) %>% 
  arrange(Scientific_Name, Mes)
  glimpse(df_observaciones)
  
  
  df_observaciones <- df_observaciones %>% 
  mutate(observation_count_mean_log10 = log10(observation_count_mean),
         observation_count_mean_log10 = case_when(is.infinite(observation_count_mean_log10) ~ 0,
                                                  TRUE ~ observation_count_mean_log10)) %>% 
  select(-observation_count_mean)
  
   # k-means-(Grupos)
  df_observaciones_aves <- df_observaciones %>% 
  select(Scientific_Name, Mes, observation_count_mean_log10) %>% 
  pivot_wider(names_from = Mes, values_from = observation_count_mean_log10, names_prefix = "mes_") 
  
  df_observaciones_aves[is.na(df_observaciones_aves)] <- 0
  
  
library(purrr)
library(tibble)
library(tidyr)
library(broom)
library(dplyr)
library(ggplot2)

  

  kclusts <- tibble(k = 1:9) %>%
  mutate(
    kclust = map(k, ~kmeans(df_observaciones_aves %>% select(-Scientific_Name), .x)),
    tidied = map(kclust, tidy),
    glanced = map(kclust, glance),
    augmented = map(kclust, augment, df_observaciones_aves %>% select(-Scientific_Name))
  )
  
kclusts

 clusters <- kclusts %>%
  unnest(tidied)
 
 assignments <- kclusts %>% 
  unnest(augmented)
 
 clusterings <- kclusts %>%
  unnest(glanced)
 
 #Agrupa los datos usando 4 grupos
df_kmeans <- df_observaciones_aves %>% 
  select(-Scientific_Name) %>% 
  kmeans(centers = 4)
df_clustered <- augment(df_kmeans, df_observaciones_aves) %>% 
  select(Scientific_Name, .cluster)
df_clustered  
 


```

Resumen
=======================================================================


row {data-width=450}
-----------------------------------------------------------------------

### Cantidad de aves en Colombia {.value-box}

```{r}
valueBox(
  value = paste(format(aves_colombia, big.mark = ","), "", sep = " "),
  caption = "Cantidad de aves en Colombia",
  icon = "fas fa-dove",
  color = colombia_color
)
```



### Cantidad especies {.value-box}

```{r}


valueBox(
  value = paste(format(cant_esp, big.mark = ","), " (",
    round(100.2 * cant_esp / aves_colombia, 1),
    "%)",
    sep = ""
  ),
  caption = "Total de Especies Observadas en el Eje Cafetero",
  icon = "fas fa-crow",
  color = count_color
)

```


Column {data-width=650}
-----------------------------------------------------------------------

### Aves observadas segun el tiempo en minutos

```{r}
  plotly::plot_ly(data = data_bird) %>%
  plotly::add_trace(x = ~ data_bird$Duration_Min,
                    y = ~ data_bird$Count,
                    type = "bar", 
                name = "Active",
                marker = list(color = "goldenrod")) %>%
  plotly::layout(barmode = 'stack',
                 yaxis = list(title = "Cantidad de Aves",
                              type = "log"),
                 xaxis = list(title = "Duracion observacion en Minutos"),
                 hovermode = "compare",
                  margin =  list(
                   # l = 60,
                   # r = 40,
                   b = 10,
                   t = 10,
                   pad = 2
                 ))
```

Column {data-width=350}
-----------------------------------------------------------------------


### Aves observadas segun el tiempo en minutos

```{r}
plotly::plot_ly(data = data_bird) %>%
  plotly::add_trace(x = ~ data_bird$Time,
                    type = "histogram", 
                name = "Active",
                marker = list(color = color)) %>%
  plotly::layout(barmode = 'stack',
                 yaxis = list(title = "Hora de inicio pajareada",
                              type = "log"),
                 xaxis = list(title = "Hora de inicio (time)"),
                 hovermode = "compare",
                  margin =  list(
                   # l = 60,
                   # r = 40,
                   b = 10,
                   t = 10,
                   pad = 2
                 ))
     
```

 

### cantidad de especies por año

```{r}

library(reactable)
orange_pal <- function(x) rgb(colorRamp(c("#ffe4cc", "#ff9500"))(x), maxColorValue = 255)


reactable(especies_count, defaultSorted = list(Cantidad_especies = "desc"), columns = list(
  Cantidad_especies = colDef(style = function(value) {
    normalized <- (value - min(especies_count$Cantidad_especies)) / (max(especies_count$Cantidad_especies) - min(especies_count$Cantidad_especies))
    color <- orange_pal(normalized)
    list(background = color)
  })
))
```

Gráficas(Especies por año y meses)
=======================================================================
Column {data-width=400}
------------------------------------------------------------------------------
### Observaciones por fecha
```{r}
     
data_bird %>% 
  count(observation_date) %>% 
  ggplot(aes(x = observation_date, y=n, group = 1)) +
  geom_line( colour = "mediumvioletred", size=1, alpha=0.9, linetype=1) +
   geom_point(color="mediumvioletred", size=2) +
  ggtitle("Observaciones por fecha") +
    labs(x = "Fecha Observacion",
         y = "Observacoines")+
   theme_light()
                         
```

### Observaciones por mes
```{r}
# log10 para Para hacer que los datos con asimetría positiva sean más "normales"

  df_observaciones %>% 
  ggplot(aes(Mes, observation_count_mean_log10)) +
  geom_jitter(color="mediumorchid", size=0.7, alpha=0.5) +
    geom_boxplot(fill = "palegreen", colour = "mediumseagreen") +
    labs(x = "Observaciones por mes",
         y = "Promedio de aves observadas (log10)")+
  theme_light()
```

### Observaciones por mes
```{r}
# log10 para Para hacer que los datos con asimetría positiva sean más "normales"
  
  df_observaciones %>% 
  ggplot(aes(Mes, observation_count_mean_log10)) +
     geom_point(color="purple", alpha=0.8) +
    labs(x = "Observaciones por mes",
         y = "Promedio de aves observadas (log10)")
```


Ordenando la agrupación en clústeres de k-means
=======================================================================
Column {data-height=400}
-----------------------------------------------------------------------

### Grupos óptimos
```{r}
ggplot(clusterings, aes(k, tot.withinss)) +
  geom_line() +
  geom_vline(xintercept = 2, linetype = 2) +
  geom_vline(xintercept = 4, linetype = 2) +
  scale_x_continuous(breaks = seq(1:9)) +
  labs(x = "Number of clusters")
```


### Agrupa los datos usando 4 grupos

```{r}
#Agrupa los datos usando 4 grupos
df_kmeans <- df_observaciones_aves %>% 
  select(-Scientific_Name) %>% 
  kmeans(centers = 4)
df_clustered <- augment(df_kmeans, df_observaciones_aves) %>% 
  select(Scientific_Name, .cluster)
df_clustered
```


Agrupacion de Cluster
=======================================================================

Column {data-weight=400}
-----------------------------------------------------------------------

### Observacones por Grupo

```{r}
 vec_common_name_cluster <-
  full_join(df_clustered,df_observaciones, by = c("Scientific_Name"= "Scientific_Name")) %>% 
  pivot_wider(names_from = Mes, values_from = observation_count_mean_log10, names_prefix = "mes_") %>% 
  arrange(.cluster, mes_Jan, mes_Feb, mes_Mar, mes_Apr, mes_May, mes_Jun, mes_Jul, mes_Aug, mes_Sep, mes_Oct, mes_Nov, mes_Dec) %>% 
  pull(Scientific_Name)

names(vec_common_name_cluster) <- c("Scientific_Name")

df_observaciones_clustered <- 
  full_join(df_clustered,df_observaciones, by = c("Scientific_Name"= "Scientific_Name")) %>%
  mutate(Scientific_Name = factor(Scientific_Name, levels = vec_common_name_cluster))
df_observaciones_clustered %>% 
  mutate(.cluster = str_c("Cluster", .cluster, sep = " ")) %>% 
  ggplot(aes(Mes, Scientific_Name, fill = observation_count_mean_log10)) +
    geom_tile() +
    facet_wrap(~.cluster, scales = "free_y", ncol = 1) +
    scale_fill_viridis_c("Promedio  observacion (log10)") +
    scale_x_discrete(expand = c(0,0)) +
    scale_y_discrete(expand = c(0,0)) +
    labs(x = "Observacion mes",
         y = "Especies") +
    theme(panel.grid = element_blank(),
          axis.text.y = element_blank(),
          axis.ticks.y = element_blank(),
          legend.direction = "horizontal",
      legend.position="bottom",
      legend.justification = "right")

```


### Observacones por Grupo 

``` {r}
df_cluster_sample <- df_clustered %>% 
  group_by(.cluster) %>% 
  sample_n(10, replace = FALSE) %>% 
  ungroup()
df_observaciones_clustered %>%
  semi_join(df_cluster_sample) %>% 
  mutate(.cluster = str_c("Cluster", .cluster, sep = " ")) %>% 
  ggplot(aes(Mes, Scientific_Name, fill = observation_count_mean_log10)) +
    geom_tile() +
    facet_wrap(~.cluster, scales = "free_y", ncol = 1 ) +
    scale_fill_viridis_c("Promedio  observacion (log10)") +
    scale_x_discrete(expand = c(0,0)) +
    scale_y_discrete(expand = c(0,0)) +
    labs(x = "Observation month",
         y = NULL) +
    theme(panel.grid = element_blank(),
          axis.text.y = element_text(size=5),
          axis.ticks.y = element_blank(),
      legend.direction = "horizontal",
      legend.position="top",
      legend.justification = "right")
```



Informacion
=======================================================================


**Datos** 
Los datos son descargados de la plataforma Ebird sobre mis avistamientos de aves desde el 2018 en el Eje cafetero.

**Referencias**

  [Refencias](https://mran.microsoft.com/snapshot/2018-07-25/web/packages/broom/vignettes/kmeans.html)