Don't wanna be here? Send us removal request.
Text
Data management and visualization, week 4
1. Program
import pandas import numpy
import seaborn import matplotlib.pyplot as plt
data = pandas.read_csv ('addhealth_pds.csv', low_memory=False)
#set pandas to show all columns in dataframe
pandas.set_option('display.max_columns', None) #set pandas to show all rows in dataframe
pandas.set_option('display.max_rows', None)
#bug fix for display formats to avoid run time errors
pandas.set_option('display.float_format', lambda x:'%f'%x)
#record missing values to python missing (NaN) data["H1RE3"] = data ["H1RE3"].replace(6, numpy.nan) data["H1RE3"] = data ["H1RE3"].replace(7, numpy.nan) data["H1RE3"] = data ["H1RE3"].replace(8, numpy.nan) data["H1RE6"] = data ["H1RE6"].replace(6, numpy.nan) data["H1RE6"] = data ["H1RE6"].replace(8, numpy.nan) data["H1RE6"] = data ["H1RE6"].replace(7, numpy.nan)
data["H1ID1O"] = data ["H1ID1O"].replace(6, numpy.nan) data["H1ID1O"] = data ["H1ID1O"].replace(9, numpy.nan) data["H1ID1Q"] = data ["H1ID1Q"].replace(6, numpy.nan) data["H1ID1Q"] = data ["H1ID1Q"].replace(9, numpy.nan)
data["H1ID1Q"] = data ["H1ID1Q"].replace(8, numpy.nan)
recode1 = {1: 4, 2: 2, 3: 1, 4:0} data["USATTD"]= data["H1RE3"].map(recode1)
recode2 = {1: 48, 2: 24, 3: 12, 4:0} data["USATTDY"]= data["H1RE3"].map(recode2) print ('counts for USATTDY') c5 = data["USATTDY"].value_counts(sort=False, dropna=False) print (c5)
print ('percentages for USATTDY') p5 = data["USATTDY"].value_counts(sort=False, normalize=True) print (p5)
recode3 = {1: 30, 2: 4, 3: 1, 4:0} data["USPRAY"]= data["H1RE6"].map(recode3)
recode4 = {1: 365, 2: 48, 3: 12, 4:0} data["USPRAYY"]= data["H1RE6"].map(recode4) print ('counts for USPRAYY') c6 = data["USPRAYY"].value_counts(sort=False, dropna=False)
#univariable bar graph for categorical variables #first hange format from numeric to categoracal
sub2= data.copy()
sub2 ["USPRAYY"] = sub2 ["USPRAYY"].astype('category')
seaborn.countplot(x="USPRAYY", data = sub2) plt.xlabel ('Types of frequency prays') plt.title ('Types of frequency prays')
print ('describe number of prays')
desc1= sub2 ["USPRAYY"].describe() print (desc1)
c1= sub2.groupby("USPRAYY").size()
print(c1)
print ('mode') mode1 = sub2["USPRAYY"].mode() print(mode1)
p1 = sub2.groupby("USPRAYY").size() * 100/ len(data) print (p1)
sub2 ["H1ID1Q"] = sub2 ["H1ID1Q"].astype('category')
seaborn.countplot(x="H1ID1Q", data = sub2) plt.xlabel ('Possibility of marriage') plt.title ('Possibility of marriage')
print ('describe possibility of marriage')
desc2= sub2 ["H1ID1Q"].describe() print (desc2)
c2= sub2.groupby("H1ID1Q").size()
print(c1)
print ('mode') mode2 = sub2["H1ID1Q"].mode() print(mode2)
p2 = sub2.groupby("H1ID1Q").size() * 100/ len(data) print (p2) print (desc2)
sub2 ["H1ID1O"] = sub2 ["H1ID1O"].astype('category')
seaborn.countplot(x="H1ID1O", data = sub2) plt.xlabel ('Possibility of sex') plt.title ('Possibility of sex')
print ('describe possibility of sex')
desc3= sub2 ["H1ID1O"].describe() print (desc3)
c3= sub2.groupby("H1ID1O").size()
print(c3)
print ('mode') mode3 = sub2["H1ID1O"].mode() print(mode3)
p3 = sub2.groupby("H1ID1O").size() * 100/ len(data) print (p3) print (desc3)
2. Outcome
counts for USATTDY
48.000000 2567
nan 894
12.000000 1105
0.000000 664
24.000000 1274
Name: USATTDY, dtype: int64
percentages for USATTDY
48.000000 0.457576
12.000000 0.196970
0.000000 0.118360
24.000000 0.227094
Name: USATTDY, dtype: float64
counts for USPRAYY
describe number of prays
count 5174.000000
unique 4.000000
top 365.000000
freq 2722.000000
Name: USPRAYY, dtype: float64
USPRAYY
0.000000 496
12.000000 586
48.000000 1370
365.000000 2722
dtype: int64
mode
0 365.000000
Name: USPRAYY, dtype: category
Categories (4, float64): [0.000000, 12.000000, 48.000000, 365.000000]
USPRAYY
0.000000 7.626076
12.000000 9.009840
48.000000 21.063961
365.000000 41.851169
dtype: float64
mode
0 2.000000
Name: H1ID1Q, dtype: category
Categories (2, float64): [1.000000, 2.000000]
H1ID1Q
1.000000 22.893604
2.000000 75.199877
dtype: float64
count 6380.000000
unique 2.000000
top 2.000000
freq 4891.000000
Name: H1ID1Q, dtype: float64
count 5174.000000
unique 4.000000
top 365.000000
freq 2722.000000
Name: USPRAYY, dtype: float64
Name: H1ID1Q, dtype: float64
describe possibility of sex
count 6436.000000
unique 3.000000
top 2.000000
freq 3533.000000
Name: H1ID1O, dtype: float64
H1ID1O
1.000000 2847
2.000000 3533
8.000000 56
dtype: int64
mode
0 2.000000
Name: H1ID1O, dtype: category
Categories (3, float64): [1.000000, 2.000000, 8.000000]
H1ID1O
1.000000 43.773063
2.000000 54.320418
8.000000 0.861009
dtype: float64
count 6436.000000
unique 3.000000
top 2.000000
freq 3533.000000
Name: H1ID1O, dtype: float64
0 notes
Text
DATA MANAGEMENT, WEEK3
1. PROGRAM
import pandas
import numpy
data = pandas.read_csv ('addhealth_pds.csv', low_memory=False)
print ('counts for original H1RE3') c1 = data["H1RE3"].value_counts(sort=False)
print ('counts for original H1RE6')
c2 = data["H1RE6"].value_counts(sort=False)
print ('counts for original H1ID1O')
c3 = data["H1ID1O"].value_counts(sort=False)
print ('counts for original H1ID1Q')
c4 = data["H1ID1Q"].value_counts(sort=False) print (c1) print(c2) print(c3) print(c4)
# set missing data to nan data["H1RE3"] = data ["H1RE3"].replace(6, numpy.nan) data["H1RE3"] = data ["H1RE3"].replace(7, numpy.nan) data["H1RE3"] = data ["H1RE3"].replace(8, numpy.nan) data["H1RE6"] = data ["H1RE6"].replace(6, numpy.nan) data["H1RE6"] = data ["H1RE6"].replace(8, numpy.nan) data["H1RE6"] = data ["H1RE6"].replace(7, numpy.nan)
data["H1ID1O"] = data ["H1ID1O"].replace(6, numpy.nan) data["H1ID1O"] = data ["H1ID1O"].replace(9, numpy.nan)
data["H1ID1O"] = data ["H1ID1O"].replace(8, numpy.nan)
data["H1ID1Q"] = data ["H1ID1Q"].replace(6, numpy.nan) data["H1ID1Q"] = data ["H1ID1Q"].replace(9, numpy.nan) data["H1ID1Q"] = data ["H1ID1Q"].replace(8, numpy.nan)
print ('counts for H1RE3 with 6,7 and 8 set to nan') p1 = data["H1RE3"].value_counts(sort=False, dropna=False)
print ('counts for H1RE6 with 6,7 and 8 set to nan')
p2 = data["H1RE6"].value_counts(sort=False, dropna=False)
print ('counts for H1ID1O with 6, 8 and 9 set to nan')
p3 = data["H1ID1O"].value_counts(sort=False, dropna=False)
print ('counts for H1ID1Q with 6, 8 and 9 set to nan')
p4 = data["H1ID1Q"].value_counts(sort=False, dropna=False)
print(p1) print (p2) print (p3) print (p4)
recode1 = {1: 4, 2: 2, 3: 1, 4:0} data["USATTD"]= data["H1RE3"].map(recode1)
recode2 = {1: 48, 2: 24, 3: 12, 4:0} data["USATTDY"]= data["H1RE3"].map(recode2) print ('counts for USATTDY') c5 = data["USATTDY"].value_counts(sort=False, dropna=False) print (c5)
print ('percentages for USATTDY') p5 = data["USATTDY"].value_counts(sort=False, normalize=True) print (p5)
recode3 = {1: 30, 2: 4, 3: 1, 4:0} data["USPRAY"]= data["H1RE6"].map(recode3)
recode4 = {1: 365, 2: 48, 3: 12, 4:0} data["USPRAYY"]= data["H1RE6"].map(recode4) print ('counts for USPRAYY') c6 = data["USPRAYY"].value_counts(sort=False, dropna=False) print (c6)
print ('percentages for USPRAYY') p6 = data["USPRAYY"].value_counts(sort=False, normalize=True) print (p6)
sub3 = data[["USATTDY", "USPRAYY"]]
sub3.head(25) print(sub3)
2. OUTPUT
counts for H1RE3 with 6 and 8 set to nan
nan 894
1.0 2567
2.0 1274
3.0 1105
4.0 664
Name: H1RE3, dtype: int64
counts for H1RE6 with 6,7 and 8 set to nan
nan 890
1.0 2722
2.0 1370
3.0 586
4.0 496
5.0 440
Name: H1RE6, dtype: int64
counts for H1ID1O with 6, 8 and 9 set to nan
nan 124
1.0 2847
2.0 3533
Name: H1ID1O, dtype: int64
counts for H1ID1Q with 6, 8 and 9 set to nan
nan 124
1.0 1489
2.0 4891
Name: H1ID1Q, dtype: int64 counts for USATTDY
nan 894
0.0 664
12.0 1105
24.0 1274
48.0 2567
Name: USATTDY, dtype: int64
percentages for USATTDY
0.0 0.118360
12.0 0.196970
24.0 0.227094
48.0 0.457576
Name: USATTDY, dtype: float64 counts for USPRAYY
nan 1330
0.0 496
12.0 586
48.0 1370
365.0 2722
Name: USPRAYY, dtype: int64 percentages for USPRAYY
0.0 0.095864
12.0 0.113259
48.0 0.264785
365.0 0.526092
Name: USPRAYY, dtype: float64
3. Managing results
I have managed the missing data in all the variables I have chosen, have coded and recoded data.
I have created 2 secondary variables but didn’t managed to group values within individual variables and make columns as it made no sense within my research question.
4. The frequency distributions for variables
52,6% percent of the respondents pray every day (365 days per year), 46% attend religious services 48 times per year.
Managing my data set is impossible to indicate the correlation between religious services attendance and romantic relationship so far.
0 notes
Text
Data Management Week2
Program
import pandas import numpy
data = pandas.read_csv('addhealth_pds.csv', low_memory=False)
data.columns = map(str.upper, data.columns)
print(len(data)) print(len(data.columns)) import pandas
import pandas
print('counts for H1RE3 frequency religious servises attendance for the past 12 months')
c1 = data["H1RE3"].value_counts(sort=False)
print (c1) print('counts for H1RE3 frequency religious servises attendance for the past 12 months, percentage')
p1 = data["H1RE3"].value_counts(sort=False, normalize=True) print (p1)
print('counts for H1ID1O would have sex in ideal romantic relarionship')
c2 = data["H1ID1O"].value_counts(sort=False) print (c2)
print('counts for H1ID1O would have sex in ideal romantic relarionship, percentage')
p2 = data["H1ID1O"].value_counts(sort=False, normalize=True) print (p2)
print('counts for H1ID1Q would get married in ideal romantic relarionship')
c3 = data["H1ID1Q"].value_counts(sort=False) print (c3)
print('counts for H1ID1Q would get married in ideal romantic relarionship, percentage')
p3 = data["H1ID1Q"].value_counts(sort=False, normalize=True) print (p3)
print('counts for H1RE6 the frequency of prays, 1 once a day, 2 once a week')
c4 = data["H1RE6"].value_counts(sort=False) print (c4) print('counts for H1RE6 the frequency of prays, 1 once a day, 2 once a week,percentage')
p4 = data["H1RE6"].value_counts(sort=False, normalize=True) print (p4)
ct1 = data.groupby('H1RE3').size() print(ct1) pt1 = data.groupby('H1RE3').size() * 100 / len(data) print(pt1)
ct2 = data.groupby('H1ID1O').size() print(ct2) pt2 = data.groupby('H1ID1O').size() * 100 / len(data) print(pt2)
ct3 = data.groupby('H1ID1Q').size() print(ct3)
pt3 = data.groupby('H1ID1Q').size() * 100 / len(data) print(pt3)
ct4 = data.groupby('H1RE6').size()
print(ct4) pt4 = data.groupby('H1RE6').size() * 100 / len(data) print(pt4)
Frequency tables
H1RE3 1 2567 2 1274 3 1105 4 664 6 8 7 879 8 7 dtype: int64 H1RE3 1 39.468020 2 19.587946 3 16.989545 4 10.209102 6 0.123001 7 13.514760 8 0.107626 dtype: float64 H1ID1O 1 2847 2 3533 6 63 8 56 9 5 dtype: int64 H1ID1O 1 43.773063 2 54.320418 6 0.968635 8 0.861009 9 0.076876 dtype: float64 H1ID1Q 1 1489 2 4891 6 63 8 56 9 5 dtype: int64 H1ID1Q 1 22.893604 2 75.199877 6 0.968635 8 0.861009 9 0.076876 dtype: float64 H1RE6 1 2722 2 1370 3 586 4 496 5 440 6 6 7 879 8 5 dtype: int64 H1RE6 1 41.851169 2 21.063961 3 9.009840 4 7.626076 5 6.765068 6 0.092251 7 13.514760 8 0.076876 dtype: float64
Among 6504 responds almost 60% attends religion services and pray at least once per week.
43% will have sex in their ideal romantic relationship and only 22% imagine to get married.
We have almost no missing data among these questions.
0 notes
Text
Coursera. Data Management and Visualization.
Assignment. Week1.
1. I have chosen the following data set:
The National Longitudinal Study of Adolescent Health
(AddHealth) is a representative school-based survey of adolescents in grades 7-12 in the United States.
2. My research question is
Does religion influence the pregnancy norms, ideal romantic relationship and marital timing of adolescents.
3. Hypotheses:
Does the frequency of religious service’s attendance influence the ideal romantic relationship
Do adolescents who identify with a religious group tend to have stronger proscriptions regarding aspects of sexuality and family planning
Does the frequency of praying and teenager’s special activities in churches influence the pregnancy norms.
Does religious teenagers tend to have sex with their partners only if they treat them as future partner
The more religious is the teenager the earlier is the ideal marriage age
4. Variables and codebook
RELIGION
H1RE1 What is your religion
H1RE2 Do you agree or disagree that the sacred scriptures of your religion are the word of God and are completely without any mistakes?
H1RE3 In the past 12 months, how often did you attend religious services?
H1RE4 How important is religion to you?
H1RE6 How often do you pray?
H1RE7 Many churches, synagogues, and other places of worship have special activities for teenagers—such as youth groups, Bible classes, or choir. In the past 12 months, how often did you attend such youth activities?
PREGNENCY
H1RP1 Getting (If R is male, add: someone) pregnant at this time in your life is one of the worst things that could happen to you.
H1RP2 It wouldn’t be all that bad if you got (IF R is male, add: someone) pregnant at this time in your life.
H1RP3 If you got the AIDS virus, you would suffer a great deal.
H1RP4 It would be a big hassle to do the things necessary to completely protect yourself from getting a sexually transmitted disease.
H1RP5 Imagine that sometime soon you were to have sexual intercourse with someone just once, but were unable to use any method of birth control for some reason. What is the chance that you would get (If R is male, add: your partner) pregnant?
H1RP6 Suppose that sometime soon you had sexual intercourse for a whole month, as often as you wanted to, without using any protection. What is the chance that you would get the AIDS virus?
IDEAL RELATIONSHIP
H1ID1A We would go out together in a group.
H1ID1B I would meet my partner’s parents. H1ID1C I would tell other people that we were a couple.
H1ID1D I would see less of my other friends so I could spend more time with my partner.
H1ID1E We would go out together alone.
H1ID1F We would hold hands.
H1ID1G I would give my partner a present.
H1ID1H My partner would give me a present.
H1ID1I I would tell my partner that I loved him or her.
H1ID1J My partner would tell me that he or she loved me.
H1ID1K We would think of ourselves as a couple.
H1ID1L We would talk about contraception or sexually transmitted diseases.
H1ID1M We would kiss.
H1ID1N We would touch each other under our clothing or with no clothes on.
H1ID1O We would have sex.
H1ID1P My partner or I would get pregnant.
H1ID1Q We would get married.
5. I found the following research on my topic:
Racial, Socioeconomic, and Religious Influences on School-Level Teen Pregnancy Norms and Behaviors
http://www.academia.edu/2740650/Racial_Socioeconomic_and_Religious_Influences_on_School-Level_Teen_Pregnancy_Norms_and_Behaviors
The Influence of Religion in Adolescence on Adolescents’ Attitude toward Marital Timing
https://repositories.lib.utexas.edu/bitstream/handle/2152/ETD-UT-2012-08-6051/REDFORD-THESIS.pdf?sequence=1
6. Conclusions after the research.
Individuals who identify with a religious group tend to have stronger proscriptionsregarding aspects of sexuality including pornography ), family planning, and specific gender roles that may influence group norms regarding teen pregnancy.
The higher maternal religious attendance and salience levels, the less likely their children were to accept nonmarital sex or cohabitation.
Maternal religiosity and affiliation frequently influenced adolescents’ attitude toward marital timing similarly to adolescent religiosity and affiliation.
0 notes