DS Week 5

Data Science for Social Science Research

Najah

Ashoka University

RMarkdown up and running?

https://www.gapminder.org/tools

Gapmidner Replication

ggplot Visual

Code

ggplot(gm_07, aes(x = gdpPercap, y = lifeExp, fill = continent, size = pop ))+
  geom_point(alpha = 0.5, shape = 21, color = "black")+
  #scale_x_log10()+
    scale_x_continuous(
    labels = scales::dollar_format(),
    breaks = scales::log_breaks(n = 10)
    ) +
  coord_trans(x = 'log10') +
  scale_size_continuous(
    labels = scales::number_format(scale = 1e-6, suffix = "m"),
    breaks = seq(1e8,1e9, 2e8),
    range = c(1,20)
    )+
  labs(
    title = "An Example of Hans Rosling's Gapminder using ggplot",
    x = "GDP per Capita (log scale)",
    y = "Life Expectancy at Birth",
    size = "Population",
    color = NULL,
    caption = "Source: Gapminder"
  ) +
  
      theme_bw() +
    annotate("text", x = 3000, y = 60, hjust = 0.5,
           size = 40, color = "#999999",
           label = "2007", alpha = .3,
           family = "Helvetica Neue") 

Visualing step by step

Data

install.packages("gapminder")

library(gapminder)

head(gapminder)

## filter the data to 2007


gm_07 = gapminder %>% filter(year ==2007)

1 Start with the dataframe

g= ggplot(gm_07)
g

1.1 Map X & Y axis

g1 = ggplot(gm_07, aes(x = gdpPercap, y = lifeExp, fill = continent, size = pop ))
g1

1.2 Represent each observation with a point

g2 = g1 +
   geom_point(alpha = 0.5, shape = 21, color = "black")

g2

1.3 Transform the co-ordinates

g3 = g2+
  coord_trans(x = 'log10')

g3

1.4 Scale the x axis

Add dollar sign

specify the log breaks

g4 = g3+
    scale_x_continuous(
    labels = scales::dollar_format(),
    breaks = scales::log_breaks(n = 10)
    )

g4

1.5 Specify the breaks and ranges

g5 = g4+ scale_size_continuous(
    labels = scales::number_format(scale = 1/1e6, suffix = "m"),
    breaks = seq(1e8,1e9, 2e8),
    range = c(1,20)
    )

g5

1.6 Add labels

g6 = g5+labs(
    title = "An Example of Hans Rosling's Gapminder using ggplot",
    x = "GDP per Capita (log scale)",
    y = "Life Expectancy at Birth",
    size = "Population",
    color = NULL,
    caption = "Source: Gapminder"
  )

g6

1.7 Add a theme

g7 = g6+theme_bw() 
g7

1.8 Annotate

g8 = g7+annotate("text", x = 3000, y = 60, hjust = 0.5,
           size = 40, color = "#999999",
           label = "2007", alpha = .3,
           family = "Helvetica Neue") 

g8

Final output

g8