US Census in 2014
US Census Data Analysis (Census Data 2014)¶
In [9]:
# Total Population Citizen - Total by ages
ax = df_vote.plot(x='Age', y=['Total Population', 'Total Citizen Population'], kind='bar', figsize=(16, 5))
ax.set_ylabel('Population (K)')
plt.show()
Import library¶
In [1]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
Data Load¶
About Dataset¶
- Unit for population is thousands (K)
In [2]:
path_data = 'data/reported_voting_and_registration_both_by_age_2014.csv'
df_vote = pd.read_table(path_data, sep=',', header=1)
In [3]:
df_vote.head()
Out[3]:
Data Cleansing¶
In [4]:
df_vote.columns
Out[4]:
In [5]:
# Rename some columns for original column name
df_vote.rename(columns={'Unnamed: 0': 'Age', 'Unnamed: 1':'Total Population',
'No response to registration 1': 'No response to registration',
'No response to voting 2': 'No response to voting'
}, inplace=True)
# Drop the columns starting with Unnamed, which contains Percent
df_vote = df_vote[[col for col in df_vote.columns if not col.startswith('Unnamed')]]
df_vote.head()
Out[5]:
In [6]:
# Drop first row
df_vote.drop(0, axis=0, inplace=True)
# Drop last and second last columns
df_vote.drop(df_vote.iloc[:, -2:].columns, axis=1, inplace=True)
df_vote.head()
Out[6]:
In [7]:
# Extract numbers from 'Age' columns
df_vote.loc[:, 'Age'] = df_vote.loc[:, 'Age'].str.extract('(\d+)', expand=True)
# Convert string numbers into float
for col in df_vote.columns:
df_vote.loc[:, col] = df_vote.loc[:, col].replace(',', '', regex=True).astype(int)
df_vote.head()
Out[7]:
Visualization for Data Understanding¶
In [8]:
df_vote.columns
Out[8]:
In [9]:
# Total Population Citizen - Total by ages
ax = df_vote.plot(x='Age', y=['Total Population', 'Total Citizen Population'], kind='bar', figsize=(16, 5))
ax.set_ylabel('Population (K)')
plt.show()
In [10]:
# Reported registered - Reported not registered by ages
ax = df_vote.plot(x='Age', y=['Reported registered', 'Reported not registered', 'No response to registration'], kind='bar', figsize=(16, 5))
ax.set_ylabel('Population (K)')
plt.show()
In [11]:
# Reported voted - Reported not voted by ages
ax = df_vote.plot(x='Age', y=['Reported voted', 'Reported did not vote', 'No response to voting'], kind='bar', figsize=(16, 5))
ax.set_ylabel('Population (K)')
plt.show()
Ratio of citizenship / registration / voting¶
In [12]:
# Ratio of Citizen by ages
plt.figure(figsize=(16, 5))
plt.plot(df_vote['Age'], df_vote['Total Citizen Population'] / df_vote['Total Population'], label='Citizen Ratio')
plt.legend()
plt.ylabel('Ratio')
plt.show()
In [13]:
# Ratio of registeration by ages
plt.figure(figsize=(16, 5))
plt.plot(df_vote['Age'], df_vote['Reported registered'] / df_vote['Total Population'], label='Registered')
plt.plot(df_vote['Age'], df_vote['Reported not registered'] / df_vote['Total Population'], label='Not registered')
plt.plot(df_vote['Age'], df_vote['No response to registration'] / df_vote['Total Population'], label='No response to registration')
legend = plt.legend()
plt.ylabel('Ratio')
plt.show()
In [14]:
# Ratio of voting by ages
plt.figure(figsize=(16, 5))
plt.plot(df_vote['Age'], df_vote['Reported voted'] / df_vote['Total Population'], label='Voted')
plt.plot(df_vote['Age'], df_vote['Reported did not vote'] / df_vote['Total Population'], label='Not voted')
plt.plot(df_vote['Age'], df_vote['No response to voting'] / df_vote['Total Population'], label='No respond to voting')
legend = plt.legend()
plt.ylabel('Ratio')
plt.show()
In [ ]:
Comments
Comments powered by Disqus