dsabtblog - Tumblr blog

dsabtblog · 5 years ago

Text

Week 4 - Bivariate graph

Graph showing the association between explanatory and response variables

Median age is the explanatory (predictor) variable. Cellphones per 100 people is the response (outcome) variable

Code

# -*- coding: utf-8 -*- """ """

import pandas import numpy

import seaborn import matplotlib.pyplot as plt # any additional libraries would be imported here

data = pandas.read_csv("./gapminder2018year2.csv", low_memory=False)

#set PANDAS to show app columns in Dataframe pandas.set_option("display.max_columns", None) #set PANDAS to show app rowsin Dataframe pandas.set_option("display.max_rows", None)

print (len(data)) #number of observations (rows) print (len(data.columns)) # number of variables (columns)

data.info() #setting variables you will be working with to numeric data['Cellphones100'] = pandas.to_numeric(data['Cellphones100']) data['IncomePPP'] = pandas.to_numeric(data['IncomePPP']) data['Population'] = pandas.to_numeric(data['Population']) data['Lifeexpect'] = pandas.to_numeric(data['Lifeexpect'])

data['median_age'] = pandas.to_numeric(data['median_age'])

#counts and percentages (i.e. frequency distributions) for each variable

print("counts for Cellphones100 - Number of countries with this number of Cell phones per 100 people") c1 = data['Cellphones100'].value_counts(sort=False, dropna=False) print (c1)

print("percentage for Cellphones100 - Percentage of countries with this number of Cell phones per 100 people") p1 = data['Cellphones100'].value_counts(sort=False, normalize=True) print (p1)

print("counts for IncomePPP - Number of countries with this Income per Person per Year") c2 = data['IncomePPP'].value_counts(sort=False, dropna=False) print (c2)

print("percentage for IncomePPP - Percentage of countries with this Income per Person per Year") p2 = data['IncomePPP'].value_counts(sort=False, normalize=True) print (p2)

print("counts for Population - Number of countries with this Population") c3 = data['Population'].value_counts(sort=False, dropna=False) print (c3)

print("percentage for Population - Percentage of countries with this Population") p3 = data['Population'].value_counts(sort=False, normalize=True) print (p3)

print("counts for Life Expectancy- Number of countries with this Life expectancy") c4 = data['Lifeexpect'].value_counts(sort=False, dropna=False) print (c4)

print("percentage for Life Expectancy- Percentage of countries with this Life expectancy") p4 = data['Lifeexpect'].value_counts(sort=False, normalize=True) print (p4)

print("counts for Median Age - Number of countries with this Median Age") c5 = data['median_age'].value_counts(sort=False, dropna=False) print (c5)

print("percentage for Median Age - Percentage of countries with this Median Age") p5 = data['median_age'].value_counts(sort=False, normalize=True) print (p5)

sub1 = data[(data["median_age"]>=14) & (data["median_age"]<=49)] sub2 = sub1.copy() print("median_age - 4 categories - quartiles") sub2["median_agegroup4"]=pandas.qcut(sub2.median_age, 4, labels=["1=25%tile","2=50%tile","3=75%tile","4=100%tile"]) c9 = sub2["median_agegroup4"].value_counts(sort=False, dropna=True) print(c9)

c6= sub2.groupby("Cellphones100").size()

print(c2)

sub2["Cellphones100"] = pandas.cut(sub2.Cellphones100, [15, 20, 25, 30, 35, 40])

# change format from numaric to catigorical #sub2['Lifeexpect'] = sub2['Lifeexpect'].astype("catogory")

print("Describe Phones per 100") desc3 = sub2['median_age'].describe() print (desc3)

sub2['median_age'] = pandas.to_numeric(sub2['median_age'])

#bivariable bar graph seaborn.catplot(x="Cellphones100", y="median_age", data=sub2, kind="bar", ci=None) plt.xlabel("Median Age") plt.ylabel("Cellphones per 100")

0 notes

dsabtblog · 5 years ago

Text

Week 4 - Univariate graph

Univariate graph displaying a skewed-left bimodal distribution of the relationship of Cellphones per 100 individuals in a population to a country’s Life Expectancy. This graph shows countries with a higher life expectancy increases cellphone ownership by the population.

Graph

Code

# -*- coding: utf-8 -*- """ import pandas

import numpy