Partie 3 : Visualisation des données avec R
- Utilisation des bibliothèques
ggplot2etplotlypour créer des graphiques - Personnalisation des graphiques : axes, légendes, couleurs, etc.
- Création de graphiques interactifs avec
plotly
GGPLOT2
Scatterplot (nuage de points)
library(tidyverse)
library(ggpubr)
theme_set(
theme_bw() +
theme(legend.position = "top")
)
p <- ggplot(mtcars, aes(mpg, wt)) +
geom_point() +
geom_smooth(method = lm) +
stat_cor(method = "pearson", label.x = 20)
p
Scatterplot (nuage de points) avec zoom contextuel
library(ggforce)
ggplot(iris, aes(Petal.Length, Petal.Width, colour = Species)) +
geom_point() +
facet_zoom(x = Species == "versicolor")
Scatterplot (nuage de points)
Avec encerclement de points
library("ggalt")
circle.df <- iris %>% filter(Species == "setosa")
ggplot(iris, aes(Petal.Length, Petal.Width)) +
geom_point(aes(colour = Species)) +
geom_encircle(data = circle.df, linetype = 2)
Avec bulles
ggplot(mtcars, aes(mpg, wt)) +
geom_point(aes(size = qsec), alpha = 0.5) +
scale_size(range = c(0.5, 12)) # Adjust the range of points size
Avec densité
ggscatterhist(
iris, x = "Sepal.Length", y = "Sepal.Width",
color = "Species", size = 3, alpha = 0.6,
palette = c("#00AFBB", "#E7B800", "#FC4E07"),
margin.params = list(fill = "Species", color = "black", size = 0.2)
)
Densité de distributions
ggplot(iris, aes(Sepal.Length, color = Species)) +
geom_density() +
scale_color_viridis_d()
Avec moyennes sur chaque groupe
mu <- iris %>%
group_by(Species) %>%
summarise(grp.mean = mean(Sepal.Length))
ggplot(iris, aes(Sepal.Length, color = Species)) +
geom_density() +
geom_vline(aes(xintercept = grp.mean, color = Species),
data = mu, linetype = 2) +
scale_color_viridis_d()
Histogrammes
# Basic histogram with mean line
ggplot(iris, aes(Sepal.Length)) +
geom_histogram(bins = 20, fill = "white", color = "black") +
geom_vline(aes(xintercept = mean(Sepal.Length)), linetype = 2)
# Add density curves
ggplot(iris, aes(Sepal.Length, stat(density))) +
geom_histogram(bins = 20, fill = "white", color = "black") +
geom_density() +
geom_vline(aes(xintercept = mean(Sepal.Length)), linetype = 2)
Couleur par groupe
ggplot(iris, aes(Sepal.Length)) +
geom_histogram(aes(fill = Species, color = Species), bins = 20,
position = "identity", alpha = 0.5) +
scale_fill_viridis_d() +
scale_color_viridis_d()
Barplot
df <- mtcars %>%
rownames_to_column() %>%
as.data.frame() %>%
mutate(cyl = as.factor(cyl)) %>%
select(rowname, wt, mpg, cyl)
df
ggplot(df, aes(x = rowname, y = mpg)) +
geom_col() +
rotate_x_text(angle = 45)
Barplot avec ajout de couleurs
ggplot(df, aes(x = rowname, y = mpg)) +
geom_col() +
geom_col( aes(fill = cyl)) +
rotate_x_text(angle = 45)
Séries longues
df <- economics %>%
select(date, psavert, uempmed) %>%
gather(key = "variable", value = "value", -date)
head(df, 3)
ggplot(df, aes(x = date, y = value)) +
geom_line(aes(color = variable), size = 1) +
scale_color_manual(values = c("#00AFBB", "#E7B800")) +
theme_minimal()
PLOTLY
2D Scatterplot
library(plotly)
data(mtcars)
cars <- mtcars
p <- plot_ly(cars, x=cars$wt, y=cars$mpg,
mode="markers", color=cars$hp, size=cars$qsec) %>%
layout(xaxis = list(title = "Weight (1000 lbs)"),
yaxis = list(title = "miles per gallon") )
p
3D Scatterplot
p <- plot_ly(cars, x=cars$wt, y=cars$mpg, z=cars$hp,
type="scatter3d", mode="markers",
color=cars$drat, size=cars$qsec) %>%
layout(scene=list(
xaxis = list(title = "Weight (1000 lbs)"),
yaxis = list(title = "miles per gallon"),
zaxis = list(title = "Gross horsepower)"))
)
p
séries longues
graph <- economics %>%
plot_ly(x=~date) %>%
add_trace(y=~unemploy/400, type="scatter", mode="lines")
graph
Boites à moustaches
boxplot <- mtcars %>%
plot_ly(x=~factor(cyl), y=~mpg) %>%
add_trace(type="scatter", name="scatter") %>%
add_boxplot(name="Boxplot") %>%
layout(
title="Fuel Efficiency"
)
boxplot