Did you know that you can navigate the posts by swiping left and right?

D3 Histogram

May 21, 2017, May 21, 2017 | Comments

category: VISUALIZATION
d3 html css javascript

One of the D3 powers is that it would allow us to display data visually and quickly. Starting from this series of blogs, I will go through how D3 could apply data to visualize it into different popular charts and talk about some commons and differences of how it handles. Also, the D3 template introduced in the previous blog would be applied over and over again.

<!DOCTYPE html>
<meta charset="utf-8">

<style>

</style>

<script src="https://d3js.org/d3.v4.min.js"></script>
<script>

</script>

1. Histogram

We first use D3’s random number generator (https://github.com/d3/d3-random) to generate a series of numbers from standardized normal distribution.

var data = d3.range(-100,100,2)
           .map(d3.randomNormal());  

And map the data generated from above to the x-axis which has the length of svg’s width.

var x = d3.scaleLinear()
        .domain([d3.min(data),d3.max(data)])
        .rangeRound([0, width]);  

Theoretically speaking, the random numbers generated are continuous numbers. By displaying them into histogram, bin is needed to group them into discrete but consecutive, non-overlapping intervals. In our example, those 100 random numbers would be binned into 10 groups.

var bins = d3.histogram()
    .domain(x.domain())
    .thresholds(x.ticks(10))(data);  

And y-axis needs to be defined to show the counts of each bin which contains the random numbers belonging to its range. Similar to x-axis, the length of y-axis is set to the length of svg.

var y = d3.scaleLinear()
    .domain([0, d3.max(bins, function(d) { return d.length; })])
    .range([height, 0]);

Finally, after generating a pseduo data, mapping the data into x-axis, deciding the number of bins of histogram, and setting up y-axis, the final histogram is now ready to be rendered into the svg.

var bar = g.selectAll("bar")
          .data(bins)
          .enter().append("g")
          .attr("class", "bar")
          .attr("transform", function(d) { return "translate(" + x(d.x0) + "," + y(d.length) + ")"; });

bar.append("rect")
    .attr("x", 0)
    .attr("width", x(bins[0].x1) - x(bins[0].x0) - 1)
    .attr("height", function(d) { return height - y(d.length); });  

Here is the full code:

<style>
svg {
    width: 100%;
    height: 100%;
    position: center;
}

#hist_eg {
   background-color: lightgrey;
}

.bar rect{
  fill: steelblue;
}

.bar rect:hover {
  fill: yellow;
}

.bar text {
  fill: white;
  font: 10px sans-serif;
}
</style>

<svg id="hist_eg" width="950" height="500"></svg>

<script src="https://d3js.org/d3.v4.min.js"></script>
<script>
var margin = {top: 30, right: 30, bottom: 30, left: 30};
var width = document.getElementById("hist_eg").getBoundingClientRect().width-100;
var height = 400;

var data = d3.range(-100,100,2)
           .map(d3.randomNormal());

var g = d3.select("#hist_eg")
        .append("g")
        .attr("transform", "translate(" + margin.left + "," + margin.top + ")");

var x = d3.scaleLinear()
        .domain([d3.min(data),d3.max(data)])
        .rangeRound([0, width]);

var bins = d3.histogram()
    .domain(x.domain())
    .thresholds(x.ticks(10))(data);

var y = d3.scaleLinear()
    .domain([0, d3.max(bins, function(d) { return d.length; })])
    .range([height, 0]);

var bar = g.selectAll("bar")
          .data(bins)
          .enter().append("g")
          .attr("class", "bar")
          .attr("transform", function(d) { return "translate(" + x(d.x0) + "," + y(d.length) + ")"; });

bar.append("rect")
    .attr("x", 0)
    .attr("width", x(bins[0].x1) - x(bins[0].x0) - 1)
    .attr("height", function(d) { return height - y(d.length); });

bar.append("text")
    .attr("dy", ".75em")
    .attr("y", 6)
    .attr("x", (x(bins[0].x1) - x(bins[0].x0)) / 2)
    .attr("text-anchor", "middle")
    .text(function(d) { return d3.format(",.0f")(d.length); });

g.append("g")
    .attr("class", "axis axis--x")
    .attr("transform", "translate(0," + height + ")")
    .call(d3.axisBottom(x));
    
g.append("g")
    .attr("class", "axis axis--y")
    .attr("transform", "translate(0,"+ height +")")
    .call(d3.axisLeft(y))
    .attr("transform", "rotate(-360)");

</script>  

2. A Real Histogram Example

With some basics from section 1 above of how D3 handles histogram from the simple example above, we can expand to use it to apply real-world data into a meaningful histogram. I download the data from https://www.data.gov/ which shows American Sex and Age of 2015. Part of the data looks like:

Age Group Male Female
Under 5 years 0.064 0.059
5 to 9 years 0.066 0.062
10 to 14 years 0.067 0.062
15 to 19 years 0.069 0.064
20 to 24 years 0.073 0.067
25 to 29 years 0.071 0.067
30 to 34 years 0.069 0.066
35 to 39 years 0.065 0.063
40 to 44 years 0.064 0.063
45 to 49 years 0.065 0.064
50 to 54 years 0.069 0.07
55 to 59 years 0.066 0.068
60 to 64 years 0.058 0.062
65 to 69 years 0.048 0.052
70 to 74 years 0.034 0.038
75 to 79 years 0.023 0.028
80 to 84 years 0.015 0.021
85 years and over 0.013 0.025

The goal here is to draw a histogram that each bin is the Male or Female statistics but group by Age Group. Different from the example shown above, the data used is not directly from D3 functions d3.randomNormal(). Instead, it would use the data from a .csv file would be called in the following example. We will call d3.csv(“datasource”) to read data in. Also, since each bin represents gender and male and female in the same age group are put together, we need to create different ways to specify how each group between each other shows up and how each bin within the same group shows up.

x0 is used to specify the padding between each group and later it is called to decide the width of each group which is equally divided by the number of groups.

var x0 = d3.scaleBand()
    .rangeRound([0, width])
    .paddingInner(0.2);
    
.attr("transform", function(d) { return "translate(" + x0(d.Age_Group) + ",0)"; })

x1 is used to specify the padding between each bin within the same group and it is called afterwards to decide the width of each bin given the width of each group and padding between each bin.

var x1 = d3.scaleBand()
    .padding(0.1);
    
.attr("width", x1.bandwidth())

An variable keys is created in the steps to read bin information from the data souce.

var keys = data.columns.slice(1);

The x-axis, y-axis are rendered by the same way as the example shown above and the lable of x-axis is created by using x0 which is basically the group information and rotated counterclockwise by 45 degree.

g.append("g")
    .attr("class", "axis")
    .attr("transform", "translate(0," + height + ")")
    .call(d3.axisBottom(x0))
    .selectAll("text")  
    .style("text-anchor", "end")
    .attr("dx", "-.1em")
    .attr("transform", "rotate(-45)" );

legend is created by using variable keys which contains the bin information from the data.

.data(keys.slice())   

Here is the full code:

<style>
svg {
    width: 100%;
    height: 100%;
    position: center;
}

#hist_sexage {
   background-color: lightgrey;
}

.rect:hover {
   fill: yellow;
}
</style>

<svg id="hist_sexage" width="950" height="500"></svg>

<script src="https://d3js.org/d3.v4.min.js"></script>
<script>
var margin = {top: 20, right: 30, bottom: 50, left: 30};
var width = document.getElementById("hist_sexage").getBoundingClientRect().width-50;
var height = 400;

var gCon = d3.select("#hist_sexage"),
    g = gCon.append("g")
        .attr("transform", "translate(" + margin.left + "," + margin.top + ")");

var x0 = d3.scaleBand()
    .rangeRound([0, width])
    .paddingInner(0.2);

var x1 = d3.scaleBand()
    .padding(0.1);

var y = d3.scaleLinear()
    .rangeRound([height, 0]);

var z = d3.scaleOrdinal()
    .range(["cornflowerblue", "orangered"]);
    
d3.csv("/blog/data/age_by_gender.csv",function(d, i, columns) {
  for (var i = 1, n = columns.length; i < n; ++i) d[columns[i]] = +d[columns[i]];
  return d;
  }, 
  function(error, data) {
  if (error) throw error;
  var keys = data.columns.slice(1);
  x0.domain(data.map(function(d) { return d.Age_Group; }));
  x1.domain(keys).rangeRound([0, x0.bandwidth()]);
  y.domain([0, d3.max(data, function(d) { return d3.max(keys, function(key) { return d[key]; }); })]).nice();
  g.append("g")
    .selectAll("g")
    .data(data)
    .enter().append("g")
    .attr("transform", function(d) { return "translate(" + x0(d.Age_Group) + ",0)"; })
    .selectAll("rect")
    .data(function(d) { return keys.map(function(key) { return {key: key, value: d[key]}; }); })
    .enter().append("rect").attr("class", "rect")
    .attr("x", function(d) { return x1(d.key); })
    .attr("y", function(d) { return y(d.value); })
    .attr("width", x1.bandwidth())
    .attr("height", function(d) { return height - y(d.value); })
    .attr("fill", function(d) { return z(d.key); })
    .on("mouseover", function() { tooltip.style("display", null); })
    .on("mousemove", function(d) {
      var xPosition = d3.mouse(this)[0] + 10;
      var yPosition = d3.mouse(this)[1] - 5;
      tooltip
      .attr("transform", "translate(" + xPosition + "," + yPosition + ")")
      .style("display", "inline-block")
      .select("text").text(d.value);
      })
    .on("mouseout", function() { tooltip.style("display", "none"); });
    
  g.append("g")
    .attr("class", "axis")
    .attr("transform", "translate(0," + height + ")")
    .call(d3.axisBottom(x0))
    .selectAll("text")  
    .style("text-anchor", "end")
    .attr("dx", "-.1em")
    .attr("transform", "rotate(-45)" );
    
  g.append("g")
    .attr("class", "axis")
    .call(d3.axisLeft(y).ticks(null, ".0%"))
    .append("text")
    .attr("x", 2)
    .attr("y", y(y.ticks().pop()) + 0.5)
    .attr("dy", "0.32em")
    .attr("fill", "#000")
    .attr("font-weight", "bold")
    .attr("text-anchor", "start")
    .text("Percentage");
    
  var legend = g.append("g")
      .attr("font-family", "sans-serif")
      .attr("font-size", 10)
      .attr("text-anchor", "end")
      .selectAll("g")
      .data(keys.slice())
      .enter().append("g")
      .attr("transform", function(d, i) { return "translate(0," + i * 20 + ")"; });
      
  legend.append("rect")
      .attr("x", width - 19)
      .attr("width", 19)
      .attr("height", 19)
      .attr("fill", z);

  legend.append("text")
      .attr("x", width - 24)
      .attr("y", 9.5)
      .attr("dy", "0.32em")
      .text(function(d) { return d; });
});
</script>

3. Stacked Bar Chart - A Twist from Histogram

Using exactly the same data above, we can twist the code a bit to draw a stacked bar chart which is essentially another type of histogram that each bin is the age information which consists of both genders.

Different from the example above, the width of each bin is only decide by the total width of svg and the padding between each bin.

var x = d3.scaleBand()
    .rangeRound([0, width])
    .paddingInner(0.2)
    .align(0.9);

In order to show stack male bar on top of female bar on the same age bin, d3.stack() function is called.

.data(d3.stack().keys(keys)(data))

This is almost the only major difference between a grouped histogram and a stacked bar chart. All the rest parts stay about the same.

Here is the full code:

<style>
svg {
    width: 100%;
    height: 100%;
    position: center;
}

#stack_sexage {
   background-color: lightgrey;
}

.rect:hover {
    fill: yellow;
}
</style>

<svg id="stack_sexage" width="950" height="500"></svg>

<script src="https://d3js.org/d3.v4.min.js"></script>
<script>
var margin = {top: 20, right: 30, bottom: 50, left: 30};
var width = document.getElementById("hist_sexage").getBoundingClientRect().width-50;
var height = 400;

var g1 = d3.select("#stack_sexage")
        .append("g")
        .attr("transform", "translate(" + margin.left + "," + margin.top + ")");
        
var x = d3.scaleBand()
    .rangeRound([0, width])
    .paddingInner(0.2)
    .align(0.9);

var y = d3.scaleLinear()
    .rangeRound([height, 0]);

var z = d3.scaleOrdinal()
    .range(["cornflowerblue", "orangered"]);

d3.csv("/blog/data/age_by_gender.csv", function(d, i, columns) {
  for (i = 1, t = 0; i < columns.length; ++i) t += d[columns[i]] = +d[columns[i]];
  d.total = t;
  return d;
}, function(error, data) {
  if (error) throw error;
  var keys = data.columns.slice(1);
  x.domain(data.map(function(d) { return d.Age_Group; }));
  y.domain([0, d3.max(data, function(d) { return d.total; })]).nice();
  z.domain(keys);

  g1.append("g")
    .selectAll("g")
    .data(d3.stack().keys(keys)(data))
    .enter().append("g")
    .attr("fill", function(d) { return z(d.key); })
    .selectAll("rect")
    .data(function(d) { return d; })
    .enter().append("rect").attr("class", "rect")
      .attr("x", function(d) { return x(d.data.Age_Group); })
      .attr("y", function(d) { return y(d[1]); })
      .attr("height", function(d) { return y(d[0]) - y(d[1]); })
      .attr("width", x.bandwidth());

  g1.append("g")
    .attr("class", "axis")
    .attr("transform", "translate(0," + height + ")")
    .call(d3.axisBottom(x))
    .selectAll("text")  
    .style("text-anchor", "end")
    .attr("dx", "-.1em")
    .attr("transform", "rotate(-45)" );
    
  g1.append("g")
    .attr("class", "axis")
    .call(d3.axisLeft(y).ticks(null, ".0%"))
    .append("text")
    .attr("x", 2)
    .attr("y", y(y.ticks().pop()) + 0.5)
    .attr("dy", "0.32em")
    .attr("fill", "#000")
    .attr("font-weight", "bold")
    .attr("text-anchor", "start")
    .text("Percentage");

  var legend = g1.append("g")
      .attr("font-family", "sans-serif")
      .attr("font-size", 10)
      .attr("text-anchor", "end")
      .selectAll("g")
      .data(keys.slice())
      .enter().append("g")
      .attr("transform", function(d, i) { return "translate(0," + i * 20 + ")"; });
      
  legend.append("rect")
      .attr("x", width - 19)
      .attr("width", 19)
      .attr("height", 19)
      .attr("fill", z);

  legend.append("text")
      .attr("x", width - 24)
      .attr("y", 9.5)
      .attr("dy", "0.32em")
      .text(function(d) { return d; });

});
</script>  


Reference:

(1). d3-3.x-api-reference, https://github.com/d3/d3-3.x-api-reference/blob/master/API-Reference.md.
(2). D3 API Reference, https://github.com/d3/d3/blob/master/API.md.