|
@@ -1,59 +1,222 @@
|
|
import pandas as pd
|
|
import pandas as pd
|
|
|
|
+import math
|
|
|
|
+import numpy as np
|
|
|
|
|
|
-def get_smoked():
|
|
|
|
- #获取所有人的出生年月
|
|
|
|
- HHID_list = []
|
|
|
|
- PN_list = []
|
|
|
|
- SMOKED_list = []
|
|
|
|
- #98年加入
|
|
|
|
- with(open("/root/r_base/HRS/1998/data/H98CS_R.DA", "r", encoding="utf-8") )as file:
|
|
|
|
|
|
+if __name__ == "__main__":
|
|
|
|
+ # age
|
|
|
|
+ # sex
|
|
|
|
+ # marital status
|
|
|
|
+ # education
|
|
|
|
+ # smoking status
|
|
|
|
+ # drinking status
|
|
|
|
+ # physical activity level
|
|
|
|
+ # body mass index (BMI)
|
|
|
|
+ # glycated haemoglobin (HbA1c)
|
|
|
|
+ # systolic blood pressure (SBP)
|
|
|
|
+ # high-density lipoprotein cholesterol (HDL-C)
|
|
|
|
+ # C-reactive protein
|
|
|
|
+
|
|
|
|
+ # 获取1992数据
|
|
|
|
+ with(open("/root/r_base/HRS/1992/data/health.da", "r", encoding="utf-8") )as file:
|
|
|
|
+ HHID_list = []
|
|
|
|
+ PN_list = []
|
|
|
|
+ BIRTH_YEAR_list = []
|
|
|
|
+ SEX_list = []
|
|
|
|
+ MARITAL_STATUS_list = []
|
|
|
|
+ EDUCATION_list = []
|
|
|
|
+ SMOKED_list = []
|
|
|
|
+ DRINKED_list = []
|
|
|
|
+ PHYSICAL_ACTIVITY_LEVEL_list = []
|
|
|
|
+ BMI_list = []
|
|
|
|
+ HEART_PROBL_list = []
|
|
|
|
+ STROKE_list=[]
|
|
# 逐行读取文件
|
|
# 逐行读取文件
|
|
for line in file:
|
|
for line in file:
|
|
HHID = line[0:6]
|
|
HHID = line[0:6]
|
|
PN = line[6:9]
|
|
PN = line[6:9]
|
|
- BORN_YEAR = line[66:70]
|
|
|
|
- SEX = line[70:71] #1.Male 2.Female
|
|
|
|
|
|
+ BIRTH_YEAR = line[249:254]
|
|
|
|
+ # 1.Male
|
|
|
|
+ # 2.Female
|
|
|
|
+ SEX = line[109:110]
|
|
|
|
+ # 1. Married [Inap in V228-V238]
|
|
|
|
+ # 2. Partner [Inap in V226-V227]
|
|
|
|
+ # 3. Separated [Inap in V226-V234]
|
|
|
|
+ # 4. Divorced [Inap in V226-V234]
|
|
|
|
+ # 5. Widowed [Inap in V226-V234]
|
|
|
|
+ # 6. Never married
|
|
|
|
+ # 7. Married with 2 family residences--both
|
|
|
|
+ # sampleable
|
|
|
|
+ # 8. Married with 2 family residences--one
|
|
|
|
+ # residence is not sampleable (institution
|
|
|
|
+ # or out of the country)
|
|
|
|
+ # 9. NA
|
|
|
|
+ MARITAL_STATUS = line[302:303]
|
|
|
|
+ # 1 Married or Partner; 5 other
|
|
|
|
+ MARITAL_STATUS = "1" if MARITAL_STATUS=="1" or MARITAL_STATUS=="2" or MARITAL_STATUS=="7" or MARITAL_STATUS=="8" else "5" if not MARITAL_STATUS=="9" else np.nan
|
|
|
|
+ # 0 For no formal education
|
|
|
|
+ # 1-11 .....Grades
|
|
|
|
+ # 12 .......High school
|
|
|
|
+ # 13-15 ....Some college
|
|
|
|
+ # 16 .......College grad
|
|
|
|
+ # 17 .......Post college (17+ years)
|
|
|
|
+ # 97 .......Other
|
|
|
|
+ EDUCATION = line[264:266]
|
|
|
|
+ # 1. Yes
|
|
|
|
+ # 5. No [Inap in V502-V505]
|
|
|
|
+ SMOKED = line[519:520]
|
|
|
|
+ # 1. Yes
|
|
|
|
+ # 5. No [Inap in V507]
|
|
|
|
+ DRINKED = line[527:528]
|
|
|
|
+ # 3. vigorous (vigorous activity more than once a week)
|
|
|
|
+ # 2. moderate (moderate activity more than once a week)
|
|
|
|
+ # 1. inactive (the rest)
|
|
|
|
+ # 重度活动
|
|
|
|
+ VIGOROUS_PHYSICAL = line[534:535]
|
|
|
|
+ # 轻度活动
|
|
|
|
+ LIGHT_PHYSICAL = line[533:534]
|
|
|
|
+ PHYSICAL_ACTIVITY_LEVEL = 3 if VIGOROUS_PHYSICAL == "1" or VIGOROUS_PHYSICAL =="2" else 2 if LIGHT_PHYSICAL=="1" or LIGHT_PHYSICAL=="2" else 1
|
|
|
|
+ # 体重
|
|
|
|
+ WEIGH= float(line[536:539])*0.45359237
|
|
|
|
+ # 身高
|
|
|
|
+ HIGHT = float(line[542:543])*0.3048 + float(line[543:545])*0.0254
|
|
|
|
+ # BMI
|
|
|
|
+ BMI = WEIGH / math.pow(HIGHT,2)
|
|
|
|
+ # A heart attack, coronary heart disease, angina, congestive heart failure, or other heart problems
|
|
|
|
+ # 1. Yes
|
|
|
|
+ # 5. No [Inap in V418-V421]
|
|
|
|
+ HEART_PROBL = line[459:460]
|
|
|
|
+ # STROKE
|
|
|
|
+ STROKE = line[473:474]
|
|
HHID_list.append(HHID)
|
|
HHID_list.append(HHID)
|
|
PN_list.append(PN)
|
|
PN_list.append(PN)
|
|
- BORN_YEAR_list.append(BORN_YEAR)
|
|
|
|
|
|
+ BIRTH_YEAR_list.append(BIRTH_YEAR)
|
|
SEX_list.append(SEX)
|
|
SEX_list.append(SEX)
|
|
- print(line, end='') # end='' 用来避免多余的换行
|
|
|
|
- #04年加入
|
|
|
|
- with(open("/root/r_base/HRS/2004/data/H04PR_R.da", "r", encoding="utf-8") )as file:
|
|
|
|
|
|
+ MARITAL_STATUS_list.append(MARITAL_STATUS)
|
|
|
|
+ EDUCATION_list.append(EDUCATION)
|
|
|
|
+ SMOKED_list.append(SMOKED)
|
|
|
|
+ DRINKED_list.append(DRINKED)
|
|
|
|
+ PHYSICAL_ACTIVITY_LEVEL_list.append(PHYSICAL_ACTIVITY_LEVEL)
|
|
|
|
+ BMI_list.append(BMI)
|
|
|
|
+ HEART_PROBL_list.append(HEART_PROBL)
|
|
|
|
+ STROKE_list.append(STROKE)
|
|
|
|
+ data = {
|
|
|
|
+ "HHID":HHID_list,
|
|
|
|
+ "PN":PN_list,
|
|
|
|
+ "BIRTH_YEAR":BIRTH_YEAR_list,
|
|
|
|
+ "SEX":SEX_list,
|
|
|
|
+ "MARITAL_STATUS":MARITAL_STATUS_list,
|
|
|
|
+ "EDUCATION":EDUCATION_list,
|
|
|
|
+ "SMOKED":SMOKED_list,
|
|
|
|
+ "DRINKED":DRINKED_list,
|
|
|
|
+ "PHYSICAL_ACTIVITY_LEVEL":PHYSICAL_ACTIVITY_LEVEL_list,
|
|
|
|
+ "BMI":BMI_list,
|
|
|
|
+ "HEART_PROBL":HEART_PROBL_list,
|
|
|
|
+ "STROKE":STROKE_list
|
|
|
|
+ }
|
|
|
|
+ data["WAVE"] = 1992
|
|
|
|
+ result = pd.DataFrame(data)
|
|
|
|
+ # 获取1993数据
|
|
|
|
+ with(open("/root/r_base/HRS/1993/data/BR21.DA", "r", encoding="utf-8") )as file:
|
|
|
|
+ HHID_list = []
|
|
|
|
+ PN_list = []
|
|
|
|
+ BIRTH_YEAR_list = []
|
|
|
|
+ SEX_list = []
|
|
|
|
+ MARITAL_STATUS_list = []
|
|
|
|
+ EDUCATION_list = []
|
|
|
|
+ SMOKED_list = []
|
|
|
|
+ DRINKED_list = []
|
|
|
|
+ PHYSICAL_ACTIVITY_LEVEL_list = []
|
|
|
|
+ BMI_list = []
|
|
|
|
+ HEART_PROBL_list = []
|
|
|
|
+ STROKE_list=[]
|
|
# 逐行读取文件
|
|
# 逐行读取文件
|
|
for line in file:
|
|
for line in file:
|
|
HHID = line[0:6]
|
|
HHID = line[0:6]
|
|
PN = line[6:9]
|
|
PN = line[6:9]
|
|
- BORN_YEAR = line[25:29]
|
|
|
|
- SEX = line[20:21] #1.Male 2.Female
|
|
|
|
|
|
+ BIRTH_YEAR = line[61:65]
|
|
|
|
+ # 1.Male
|
|
|
|
+ # 2.Female
|
|
|
|
+ SEX = line[16:17]
|
|
|
|
+ # MARRIED, SPOUSE PRESENT........... 1
|
|
|
|
+ # MARRIED, SPOUSE ABSENT............ 2
|
|
|
|
+ # LIVING WITH SOMEONE............... 3 GO TO A11b
|
|
|
|
+ # DIVORCED/SEPARATED................ 4 GO TO A11g
|
|
|
|
+ # WIDOWED........................... 5 GO TO A11g
|
|
|
|
+ # NEVER MARRIED..................... 6 GO TO B1
|
|
|
|
+ MARITAL_STATUS = line[98:99]
|
|
|
|
+ # 1 Married or Partner; 5 other
|
|
|
|
+ MARITAL_STATUS = "1" if MARITAL_STATUS=="1" or MARITAL_STATUS=="2" or MARITAL_STATUS=="3" else "5"
|
|
|
|
+ # 0 For no formal education
|
|
|
|
+ # 1-11 .....Grades
|
|
|
|
+ # 12 .......High school
|
|
|
|
+ # 13-15 ....Some college
|
|
|
|
+ # 16 .......College grad
|
|
|
|
+ # 17 .......Post college (17+ years)
|
|
|
|
+ # 97 .......Other
|
|
|
|
+ EDUCATION = line[74:76]
|
|
|
|
+ #CURRENT SMOKER..................... 1
|
|
|
|
+ # FORMER SMOKER...................... 2 GO TO B20
|
|
|
|
+ # NEVER SMOKED....................... 3 GO TO B20
|
|
|
|
+ # 1. Yes
|
|
|
|
+ # 5. No [Inap in V502-V505]
|
|
|
|
+ SMOKED = "1" if line[172:173] == "1" or line[172:173] == "2" else "5"
|
|
|
|
+ # 1. Yes
|
|
|
|
+ # 5. No [Inap in V507]
|
|
|
|
+ DRINKED = line[176:177]
|
|
|
|
+ # 3. vigorous (vigorous activity more than once a week)
|
|
|
|
+ # 2. moderate (moderate activity more than once a week)
|
|
|
|
+ # 1. inactive (the rest)
|
|
|
|
+ # 重度活动
|
|
|
|
+ VIGOROUS_PHYSICAL = np.nan
|
|
|
|
+ # 轻度活动
|
|
|
|
+ LIGHT_PHYSICAL = np.nan
|
|
|
|
+ PHYSICAL_ACTIVITY_LEVEL = np.nan
|
|
|
|
+ # 体重
|
|
|
|
+ WEIGH= float(line[179:182])*0.45359237
|
|
|
|
+ # 身高
|
|
|
|
+ HEIGHT = float(line[182:184])*0.0254
|
|
|
|
+ # BMI
|
|
|
|
+ BMI = WEIGH / math.pow(HEIGHT,2)
|
|
|
|
+ # A heart attack, coronary heart disease, angina, congestive heart failure, or other heart problems
|
|
|
|
+ # 1. Yes
|
|
|
|
+ # 5. No [Inap in V418-V421]
|
|
|
|
+ HEART_PROBL = line[139:140]
|
|
|
|
+ # STROKE
|
|
|
|
+ STROKE = line[142:143]
|
|
HHID_list.append(HHID)
|
|
HHID_list.append(HHID)
|
|
PN_list.append(PN)
|
|
PN_list.append(PN)
|
|
- BORN_YEAR_list.append(BORN_YEAR)
|
|
|
|
|
|
+ BIRTH_YEAR_list.append(BIRTH_YEAR)
|
|
SEX_list.append(SEX)
|
|
SEX_list.append(SEX)
|
|
- print(line, end='') # end='' 用来避免多余的换行
|
|
|
|
- data = {
|
|
|
|
- "HHID":HHID_list,
|
|
|
|
- "PN":PN_list,
|
|
|
|
- "BORN_YEAR":BORN_YEAR_list,
|
|
|
|
- "SEX":SEX_list
|
|
|
|
- }
|
|
|
|
- result = pd.DataFrame(data)
|
|
|
|
- print(result.info())
|
|
|
|
- #进行去重处理
|
|
|
|
- result.drop_duplicates(subset=["HHID","PN"], keep="last", inplace=True)
|
|
|
|
- print(result.info())
|
|
|
|
- result.to_csv("/root/r_base/HRS/result_born_sex.csv", encoding="utf-8", index=False)
|
|
|
|
-
|
|
|
|
-if __name__ == "__main__":
|
|
|
|
- # result_born_sex = pd.read_csv("/root/r_base/HRS/result_born_sex.csv", encoding="utf-8")
|
|
|
|
- #2006数据
|
|
|
|
- year = "20"
|
|
|
|
- wave = "06"
|
|
|
|
- # 获取06年之前加入人员的信息
|
|
|
|
- with(open("/root/r_base/HRS/2006/data/H06PR_R.da", "r", encoding="utf-8") )as file:
|
|
|
|
|
|
+ MARITAL_STATUS_list.append(MARITAL_STATUS)
|
|
|
|
+ EDUCATION_list.append(EDUCATION)
|
|
|
|
+ SMOKED_list.append(SMOKED)
|
|
|
|
+ DRINKED_list.append(DRINKED)
|
|
|
|
+ PHYSICAL_ACTIVITY_LEVEL_list.append(PHYSICAL_ACTIVITY_LEVEL)
|
|
|
|
+ BMI_list.append(BMI)
|
|
|
|
+ HEART_PROBL_list.append(HEART_PROBL)
|
|
|
|
+ STROKE_list.append(STROKE)
|
|
|
|
+ data = {
|
|
|
|
+ "HHID":HHID_list,
|
|
|
|
+ "PN":PN_list,
|
|
|
|
+ "BIRTH_YEAR":BIRTH_YEAR_list,
|
|
|
|
+ "SEX":SEX_list,
|
|
|
|
+ "MARITAL_STATUS":MARITAL_STATUS_list,
|
|
|
|
+ "EDUCATION":EDUCATION_list,
|
|
|
|
+ "SMOKED":SMOKED_list,
|
|
|
|
+ "DRINKED":DRINKED_list,
|
|
|
|
+ "PHYSICAL_ACTIVITY_LEVEL":PHYSICAL_ACTIVITY_LEVEL_list,
|
|
|
|
+ "BMI":BMI_list,
|
|
|
|
+ "HEART_PROBL":HEART_PROBL_list,
|
|
|
|
+ "STROKE":STROKE_list
|
|
|
|
+ }
|
|
|
|
+ data["WAVE"] = 1993
|
|
|
|
+ result_1993 = pd.DataFrame(data)
|
|
|
|
+ result = pd.concat([result, result_1993], axis=0)
|
|
|
|
+ # 获取1994数据
|
|
|
|
+ with(open("/root/r_base/HRS/1994/data/W2a.da", "r", encoding="utf-8") )as file:
|
|
HHID_list = []
|
|
HHID_list = []
|
|
PN_list = []
|
|
PN_list = []
|
|
- BORN_YEAR_list = []
|
|
|
|
|
|
+ BIRTH_YEAR_list = []
|
|
SEX_list = []
|
|
SEX_list = []
|
|
MARITAL_STATUS_list = []
|
|
MARITAL_STATUS_list = []
|
|
EDUCATION_list = []
|
|
EDUCATION_list = []
|
|
@@ -61,18 +224,22 @@ if __name__ == "__main__":
|
|
for line in file:
|
|
for line in file:
|
|
HHID = line[0:6]
|
|
HHID = line[0:6]
|
|
PN = line[6:9]
|
|
PN = line[6:9]
|
|
- BORN_YEAR = line[25:29]
|
|
|
|
|
|
+ BIRTH_YEAR = line[26:30]
|
|
# 1.Male
|
|
# 1.Male
|
|
# 2.Female
|
|
# 2.Female
|
|
- SEX = line[20:21]
|
|
|
|
- # 0. UNKNOWN
|
|
|
|
- # 1. MARRIED
|
|
|
|
- # 2. MARRIED SP ABSENT (IN INSTITUTION)
|
|
|
|
- # 3. MARRIED SP ABSENT (NOT IN INSTITUTION)
|
|
|
|
- # 4. DIVORCED/SEPARATED
|
|
|
|
- # 5. WIDOWED
|
|
|
|
- # 6. NEVER MARRIED
|
|
|
|
- MARITAL_STATUS = line[106:107]
|
|
|
|
|
|
+ SEX = line[22:23]
|
|
|
|
+ # 1. Married [Inap in V228-V238]
|
|
|
|
+ # 2. Partner [Inap in V226-V227]
|
|
|
|
+ # 3. Separated [Inap in V226-V234]
|
|
|
|
+ # 4. Divorced [Inap in V226-V234]
|
|
|
|
+ # 5. Widowed [Inap in V226-V234]
|
|
|
|
+ # 6. Never married
|
|
|
|
+ # 7. Married (Not Institutionalized/not out of country)
|
|
|
|
+ # 8. Married (Institutionalized/out of country)
|
|
|
|
+ # 9. NA
|
|
|
|
+ MARITAL_STATUS = line[55:57]
|
|
|
|
+ # 1 Married or Partner; 5 other
|
|
|
|
+ MARITAL_STATUS = "1" if MARITAL_STATUS=="1" or MARITAL_STATUS=="2" or MARITAL_STATUS=="7" or MARITAL_STATUS=="8" else "5" if not MARITAL_STATUS=="9" else np.nan
|
|
# 0 For no formal education
|
|
# 0 For no formal education
|
|
# 1-11 .....Grades
|
|
# 1-11 .....Grades
|
|
# 12 .......High school
|
|
# 12 .......High school
|
|
@@ -80,51 +247,2543 @@ if __name__ == "__main__":
|
|
# 16 .......College grad
|
|
# 16 .......College grad
|
|
# 17 .......Post college (17+ years)
|
|
# 17 .......Post college (17+ years)
|
|
# 97 .......Other
|
|
# 97 .......Other
|
|
- EDUCATION = line[585:587]
|
|
|
|
|
|
+ # 98. Don't Know; DK
|
|
|
|
+ # 99. Not Ascertained; NA
|
|
|
|
+ EDUCATION = line[112:115]
|
|
HHID_list.append(HHID)
|
|
HHID_list.append(HHID)
|
|
PN_list.append(PN)
|
|
PN_list.append(PN)
|
|
- BORN_YEAR_list.append(BORN_YEAR)
|
|
|
|
|
|
+ BIRTH_YEAR_list.append(BIRTH_YEAR)
|
|
SEX_list.append(SEX)
|
|
SEX_list.append(SEX)
|
|
MARITAL_STATUS_list.append(MARITAL_STATUS)
|
|
MARITAL_STATUS_list.append(MARITAL_STATUS)
|
|
EDUCATION_list.append(EDUCATION)
|
|
EDUCATION_list.append(EDUCATION)
|
|
- print(line, end='') # end='' 用来避免多余的换行
|
|
|
|
data = {
|
|
data = {
|
|
"HHID":HHID_list,
|
|
"HHID":HHID_list,
|
|
"PN":PN_list,
|
|
"PN":PN_list,
|
|
- "BORN_YEAR":BORN_YEAR_list,
|
|
|
|
|
|
+ "BIRTH_YEAR":BIRTH_YEAR_list,
|
|
"SEX":SEX_list,
|
|
"SEX":SEX_list,
|
|
"MARITAL_STATUS":MARITAL_STATUS_list,
|
|
"MARITAL_STATUS":MARITAL_STATUS_list,
|
|
- "EDUCATION":EDUCATION_list
|
|
|
|
|
|
+ "EDUCATION":EDUCATION_list,
|
|
}
|
|
}
|
|
- data["WAVE"] = 2006
|
|
|
|
- result = pd.DataFrame(data)
|
|
|
|
- # 将06年新加入的人员合并入数据
|
|
|
|
- with(open("/root/r_base/HRS/2006/data/H06C_R.da", "r", encoding="utf-8") )as file:
|
|
|
|
|
|
+ data["WAVE"] = 1994
|
|
|
|
+ result_1994_one = pd.DataFrame(data)
|
|
|
|
+ with(open("/root/r_base/HRS/1994/data/W2B.DA", "r", encoding="utf-8") )as file:
|
|
HHID_list = []
|
|
HHID_list = []
|
|
PN_list = []
|
|
PN_list = []
|
|
- DRINK_list = []
|
|
|
|
|
|
+ SMOKED_list = []
|
|
|
|
+ DRINKED_list = []
|
|
|
|
+ PHYSICAL_ACTIVITY_LEVEL_list = []
|
|
|
|
+ BMI_list = []
|
|
|
|
+ HEART_PROBL_list = []
|
|
|
|
+ STROKE_list=[]
|
|
# 逐行读取文件
|
|
# 逐行读取文件
|
|
for line in file:
|
|
for line in file:
|
|
HHID = line[0:6]
|
|
HHID = line[0:6]
|
|
PN = line[6:9]
|
|
PN = line[6:9]
|
|
- #饮酒
|
|
|
|
- # 1. YES
|
|
|
|
- # 3. [VOL] NEVER HAVE USED ALCOHOL
|
|
|
|
- # 5. NO
|
|
|
|
- # 8. DK (Don't Know); NA (Not Ascertained)
|
|
|
|
- # 9. RF (Refused)
|
|
|
|
- # Blank. INAP (Inapplicable); Partial Interview
|
|
|
|
- DRINK = line[207:208]
|
|
|
|
-
|
|
|
|
|
|
+ #1. Yes
|
|
|
|
+ #5. No [GO TO B41]
|
|
|
|
+ #8. Don't Know; DK [GO TO B41]
|
|
|
|
+ #9. Refused; RF [GO TO B41]
|
|
|
|
+ #0. Inap.
|
|
|
|
+ # Proxy interview for deceased Wave-1 R
|
|
|
|
+ SMOKED = line[356:358]
|
|
|
|
+ #1. Yes
|
|
|
|
+ #5. No [GO TO B41]
|
|
|
|
+ #8. Don't Know; DK [GO TO B41]
|
|
|
|
+ #9. Refused; RF [GO TO B41]
|
|
|
|
+ #0. Inap.
|
|
|
|
+ # Proxy interview for deceased Wave-1 R
|
|
|
|
+ DRINKED = line[367:369]
|
|
|
|
+ # 3. vigorous (vigorous activity more than once a week)
|
|
|
|
+ # 2. moderate (moderate activity more than once a week)
|
|
|
|
+ # 1. inactive (the rest)
|
|
|
|
+ # 活动单位
|
|
|
|
+ # 02. Week
|
|
|
|
+ # 04. Month
|
|
|
|
+ # 06. Year
|
|
|
|
+ # 07. Other (specify)
|
|
|
|
+ # 11. Day
|
|
|
|
+ # 98. Don't Know/Not Ascertained; DK/NA
|
|
|
|
+ # 99. Refused; RF
|
|
|
|
+ # 00. Inap.
|
|
|
|
+ # Proxy interview for deceased Wave-1 R
|
|
|
|
+ # [B42a: or B42=995-999]
|
|
|
|
+ # [B43a: or B43=995-999]
|
|
|
|
+ # 重度活动
|
|
|
|
+ VIGOROUS_PHYSICAL = line[378:382]
|
|
|
|
+ VIGOROUS_UNIT = line[382:385]
|
|
|
|
+ VIGOROUS_PHYSICAL_FLAG = np.nan
|
|
|
|
+ if VIGOROUS_UNIT == "02" and float(VIGOROUS_PHYSICAL)>0 and not VIGOROUS_PHYSICAL=="994" and not float(VIGOROUS_PHYSICAL)>995:
|
|
|
|
+ VIGOROUS_PHYSICAL_FLAG = True
|
|
|
|
+ if VIGOROUS_UNIT == "04" and float(VIGOROUS_PHYSICAL)>3 and not VIGOROUS_PHYSICAL=="994" and not float(VIGOROUS_PHYSICAL)>995:
|
|
|
|
+ VIGOROUS_PHYSICAL_FLAG = True
|
|
|
|
+ if VIGOROUS_UNIT == "06" and float(VIGOROUS_PHYSICAL)>51 and not VIGOROUS_PHYSICAL=="994" and not float(VIGOROUS_PHYSICAL)>995:
|
|
|
|
+ VIGOROUS_PHYSICAL_FLAG = True
|
|
|
|
+ if VIGOROUS_PHYSICAL=="00":
|
|
|
|
+ VIGOROUS_PHYSICAL_FLAG = False
|
|
|
|
+ # 轻度活动
|
|
|
|
+ LIGHT_PHYSICAL = line[371:375]
|
|
|
|
+ LIGHT_UNIT = line[375:378]
|
|
|
|
+ # 判断是否符合轻运动 1符合;0不符合
|
|
|
|
+ LIGHT_PHYSICAL_FLAG = np.nan
|
|
|
|
+ if LIGHT_UNIT == "02" and float(LIGHT_PHYSICAL)>0 and not LIGHT_PHYSICAL=="994" and not float(LIGHT_PHYSICAL)>995:
|
|
|
|
+ LIGHT_PHYSICAL_FLAG = True
|
|
|
|
+ if LIGHT_UNIT == "04" and float(LIGHT_PHYSICAL)>3 and not LIGHT_PHYSICAL=="994" and not float(LIGHT_PHYSICAL)>995:
|
|
|
|
+ LIGHT_PHYSICAL_FLAG = True
|
|
|
|
+ if LIGHT_UNIT == "06" and float(LIGHT_PHYSICAL)>51 and not LIGHT_PHYSICAL=="994" and not float(LIGHT_PHYSICAL)>995:
|
|
|
|
+ LIGHT_PHYSICAL_FLAG = True
|
|
|
|
+ if LIGHT_PHYSICAL=="00":
|
|
|
|
+ LIGHT_PHYSICAL_FLAG = False
|
|
|
|
+ PHYSICAL_ACTIVITY_LEVEL = 3 if VIGOROUS_PHYSICAL_FLAG == True else 2 if LIGHT_PHYSICAL_FLAG==True else 1 if LIGHT_PHYSICAL_FLAG==False or VIGOROUS_PHYSICAL_FLAG==False else np.nan
|
|
|
|
+ # 体重
|
|
|
|
+ WEIGH= float(line[385:389])*0.45359237 if not float(line[385:389])>500 else np.nan
|
|
|
|
+ # 身高
|
|
|
|
+ HIGHT = float(line[389:392])*0.3048 + float(line[392:395])*0.0254 if not float(line[389:392])>95 and not float(line[392:395])>95 else np.nan
|
|
|
|
+ # BMI
|
|
|
|
+ BMI = WEIGH / math.pow(HIGHT,2) if not np.isnan(WEIGH) and not np.isnan(HIGHT) else np.nan
|
|
|
|
+ # A heart attack, coronary heart disease, angina, congestive heart failure, or other heart problems
|
|
|
|
+ # 1. Yes
|
|
|
|
+ # 5. No [Inap in V418-V421]
|
|
|
|
+ HEART_PROBL = line[147:149] if not line[147:149]=="8" and not line[147:149]=="9" and not line[147:149]=="0" else np.nan
|
|
|
|
+ # STROKE
|
|
|
|
+ STROKE = line[173:175] if not line[173:175]=="8" and not line[173:175]=="9" and not line[173:175]=="0" else np.nan
|
|
HHID_list.append(HHID)
|
|
HHID_list.append(HHID)
|
|
PN_list.append(PN)
|
|
PN_list.append(PN)
|
|
- DRINK_list.append(DRINK)
|
|
|
|
- print(line, end='') # end='' 用来避免多余的换行
|
|
|
|
|
|
+ SMOKED_list.append(SMOKED)
|
|
|
|
+ DRINKED_list.append(DRINKED)
|
|
|
|
+ PHYSICAL_ACTIVITY_LEVEL_list.append(PHYSICAL_ACTIVITY_LEVEL)
|
|
|
|
+ BMI_list.append(BMI)
|
|
|
|
+ HEART_PROBL_list.append(HEART_PROBL)
|
|
|
|
+ STROKE_list.append(STROKE)
|
|
data = {
|
|
data = {
|
|
"HHID":HHID_list,
|
|
"HHID":HHID_list,
|
|
"PN":PN_list,
|
|
"PN":PN_list,
|
|
- "EDUCATION":EDUCATION_list
|
|
|
|
|
|
+ "SMOKED":SMOKED_list,
|
|
|
|
+ "DRINKED":DRINKED_list,
|
|
|
|
+ "PHYSICAL_ACTIVITY_LEVEL":PHYSICAL_ACTIVITY_LEVEL_list,
|
|
|
|
+ "BMI":BMI_list,
|
|
|
|
+ "HEART_PROBL":HEART_PROBL_list,
|
|
|
|
+ "STROKE":STROKE_list
|
|
}
|
|
}
|
|
- result = pd.DataFrame(data)
|
|
|
|
- print(result.info())
|
|
|
|
|
|
+ result_1994_two = pd.DataFrame(data)
|
|
|
|
+ result_1994 = pd.merge(result_1994_one, result_1994_two, on=["HHID", "PN"], how="left")
|
|
|
|
+ result = pd.concat([result, result_1994], axis=0)
|
|
|
|
+ print(result.head())
|
|
|
|
+ # 获取1995数据
|
|
|
|
+ with(open("/root/r_base/HRS/1995/data/A95A_R.DA", "r", encoding="utf-8") )as file:
|
|
|
|
+ HHID_list = []
|
|
|
|
+ PN_list = []
|
|
|
|
+ BIRTH_YEAR_list = []
|
|
|
|
+ MARITAL_STATUS_list = []
|
|
|
|
+ EDUCATION_list = []
|
|
|
|
+ # 逐行读取文件
|
|
|
|
+ for line in file:
|
|
|
|
+ HHID = line[0:6]
|
|
|
|
+ PN = line[6:9]
|
|
|
|
+ BIRTH_YEAR = line[30:34]
|
|
|
|
+ # MARRIED, SPOUSE PRESENT........... 1
|
|
|
|
+ # MARRIED, SPOUSE ABSENT............ 2
|
|
|
|
+ # LIVING WITH SOMEONE............... 3 GO TO A11b
|
|
|
|
+ # DIVORCED/SEPARATED................ 4 GO TO A11g
|
|
|
|
+ # WIDOWED........................... 5 GO TO A11g
|
|
|
|
+ # NEVER MARRIED..................... 6 GO TO B1
|
|
|
|
+ # 0. Exit proxy was taken before the interview with the surviving spouse.
|
|
|
|
+ # 7. MARRIED, SPOUSE ABSENT (NOT INSTITUTION)
|
|
|
|
+ MARITAL_STATUS = line[76:77]
|
|
|
|
+ # 1 Married or Partner; 5 other
|
|
|
|
+ MARITAL_STATUS = "1" if MARITAL_STATUS=="1" or MARITAL_STATUS=="2" or MARITAL_STATUS=="3" else np.nan if MARITAL_STATUS=="0" or MARITAL_STATUS=="7" else "5"
|
|
|
|
+ # 0 For no formal education
|
|
|
|
+ # 1-11 .....Grades
|
|
|
|
+ # 12 .......High school
|
|
|
|
+ # 13-15 ....Some college
|
|
|
|
+ # 16 .......College grad
|
|
|
|
+ # 17 .......Post college (17+ years)
|
|
|
|
+ # 97 .......Other
|
|
|
|
+ # 98. DK (don't know); NA (not ascertained)
|
|
|
|
+ # 99. RF (refused)
|
|
|
|
+ EDUCATION = np.nan if line[49:51] == "97" or line[49:51] == "98" or line[49:51] == "99" else line[49:51]
|
|
|
|
+ HHID_list.append(HHID)
|
|
|
|
+ PN_list.append(PN)
|
|
|
|
+ BIRTH_YEAR_list.append(BIRTH_YEAR)
|
|
|
|
+ MARITAL_STATUS_list.append(MARITAL_STATUS)
|
|
|
|
+ EDUCATION_list.append(EDUCATION)
|
|
|
|
+ data = {
|
|
|
|
+ "HHID":HHID_list,
|
|
|
|
+ "PN":PN_list,
|
|
|
|
+ "BIRTH_YEAR":BIRTH_YEAR_list,
|
|
|
|
+ "MARITAL_STATUS":MARITAL_STATUS_list,
|
|
|
|
+ "EDUCATION":EDUCATION_list,
|
|
|
|
+ }
|
|
|
|
+ data["WAVE"] = 1995
|
|
|
|
+ result_1995_one = pd.DataFrame(data)
|
|
|
|
+ with(open("/root/r_base/HRS/1995/data/A95CS_R.DA", "r", encoding="utf-8") )as file:
|
|
|
|
+ HHID_list = []
|
|
|
|
+ PN_list = []
|
|
|
|
+ SEX_list = []
|
|
|
|
+ # 逐行读取文件
|
|
|
|
+ for line in file:
|
|
|
|
+ HHID = line[0:6]
|
|
|
|
+ PN = line[6:9]
|
|
|
|
+ # 1.Male
|
|
|
|
+ # 2.Female
|
|
|
|
+ SEX = line[36:37] if not line[36:37]=="0" else np.nan
|
|
|
|
+ HHID_list.append(HHID)
|
|
|
|
+ PN_list.append(PN)
|
|
|
|
+ SEX_list.append(SEX)
|
|
|
|
+ data = {
|
|
|
|
+ "HHID":HHID_list,
|
|
|
|
+ "PN":PN_list,
|
|
|
|
+ "SEX":SEX_list,
|
|
|
|
+ }
|
|
|
|
+ result_1995_two = pd.DataFrame(data)
|
|
|
|
+ result_1995 = pd.merge(result_1995_one, result_1995_two, on=["HHID", "PN"], how="left")
|
|
|
|
+ with(open("/root/r_base/HRS/1995/data/A95B_R.DA", "r", encoding="utf-8") )as file:
|
|
|
|
+ HHID_list = []
|
|
|
|
+ PN_list = []
|
|
|
|
+ SMOKED_list = []
|
|
|
|
+ DRINKED_list = []
|
|
|
|
+ PHYSICAL_ACTIVITY_LEVEL_list = []
|
|
|
|
+ BMI_list = []
|
|
|
|
+ HEART_PROBL_list = []
|
|
|
|
+ STROKE_list=[]
|
|
|
|
+ # 逐行读取文件
|
|
|
|
+ for line in file:
|
|
|
|
+ HHID = line[0:6]
|
|
|
|
+ PN = line[6:9]
|
|
|
|
+ # 1. YES
|
|
|
|
+ # 2. Cigars
|
|
|
|
+ # 5. NO
|
|
|
|
+ # 7. Other
|
|
|
|
+ # 8. DK (don't know); NA (not ascertained)
|
|
|
|
+ # 9. RF (refused)
|
|
|
|
+ # 合并后
|
|
|
|
+ # 1. Yes
|
|
|
|
+ # 5. No [Inap in V502-V505]
|
|
|
|
+ SMOKED = "1" if line[153:154] == "1" or line[153:154] == "2" else "5" if line[153:154] == "5" else np.nan
|
|
|
|
+ # 1. YES
|
|
|
|
+ # 3. [VOL] NEVER HAVE USED ALCOHOL
|
|
|
|
+ # 5. NO
|
|
|
|
+ # 7. Other
|
|
|
|
+ # 8. DK (don't know); NA (not ascertained)
|
|
|
|
+ # 9. RF (refused)
|
|
|
|
+ # 合并后
|
|
|
|
+ # 1. Yes
|
|
|
|
+ # 5. No [Inap in V502-V505]
|
|
|
|
+ DRINKED = "1" if line[157:158] == "1" else "5" if line[157:158] == "5" or line[157:158] == "3" else np.nan
|
|
|
|
+ # 3. vigorous (vigorous activity more than once a week)
|
|
|
|
+ # 2. moderate (moderate activity more than once a week)
|
|
|
|
+ # 1. inactive (the rest)
|
|
|
|
+ # 重度活动
|
|
|
|
+ VIGOROUS_PHYSICAL = np.nan
|
|
|
|
+ # 轻度活动
|
|
|
|
+ LIGHT_PHYSICAL = np.nan
|
|
|
|
+ PHYSICAL_ACTIVITY_LEVEL = np.nan
|
|
|
|
+ # 体重
|
|
|
|
+ WEIGH= float(line[164:167])*0.45359237 if not float(line[164:167])>400 else np.nan
|
|
|
|
+ # 身高
|
|
|
|
+ HIGHT = line[168:169]
|
|
|
|
+ if not line[168:169] == " " and not line[168:169] == "8" and not line[168:169] == "9" and not pd.isna(WEIGH):
|
|
|
|
+ HIGHT = float(line[168:169])*0.3048 + float(line[169:171])*0.0254
|
|
|
|
+ # BMI
|
|
|
|
+ BMI = WEIGH / math.pow(HEIGHT,2)
|
|
|
|
+ else :
|
|
|
|
+ BMI = np.nan
|
|
|
|
+ # 1. YES
|
|
|
|
+ # 3. [VOL] DISPUTES W1 RECORD
|
|
|
|
+ # 5. NO
|
|
|
|
+ # 7. Other
|
|
|
|
+ # 8. DK (don't know); NA (not ascertained)
|
|
|
|
+ # 9. RF (refused)
|
|
|
|
|
|
|
|
+ # A heart attack, coronary heart disease, angina, congestive heart failure, or other heart problems
|
|
|
|
+ # 1. Yes
|
|
|
|
+ # 3. [VOL] DISPUTES W1 RECORD
|
|
|
|
+ # 5. No [Inap in V418-V421]
|
|
|
|
+ HEART_PROBL = np.nan if line[63:64] == "8" else line[63:64]
|
|
|
|
+ # STROKE
|
|
|
|
+ # 1. Yes
|
|
|
|
+ # 3. [VOL] DISPUTES W1 RECORD
|
|
|
|
+ # 5. No [Inap in V418-V421]
|
|
|
|
+ STROKE = np.nan if line[84:85] == "8" else "5" if line[84:85] == "2" or line[84:85] == "5" else line[84:85]
|
|
|
|
+ HHID_list.append(HHID)
|
|
|
|
+ PN_list.append(PN)
|
|
|
|
+ SMOKED_list.append(SMOKED)
|
|
|
|
+ DRINKED_list.append(DRINKED)
|
|
|
|
+ PHYSICAL_ACTIVITY_LEVEL_list.append(PHYSICAL_ACTIVITY_LEVEL)
|
|
|
|
+ BMI_list.append(BMI)
|
|
|
|
+ HEART_PROBL_list.append(HEART_PROBL)
|
|
|
|
+ STROKE_list.append(STROKE)
|
|
|
|
+ data = {
|
|
|
|
+ "HHID":HHID_list,
|
|
|
|
+ "PN":PN_list,
|
|
|
|
+ "SMOKED":SMOKED_list,
|
|
|
|
+ "DRINKED":DRINKED_list,
|
|
|
|
+ "PHYSICAL_ACTIVITY_LEVEL":PHYSICAL_ACTIVITY_LEVEL_list,
|
|
|
|
+ "BMI":BMI_list,
|
|
|
|
+ "HEART_PROBL":HEART_PROBL_list,
|
|
|
|
+ "STROKE":STROKE_list
|
|
|
|
+ }
|
|
|
|
+ result_1995_three = pd.DataFrame(data)
|
|
|
|
+ result_1995 = pd.merge(result_1995, result_1995_three, on=["HHID", "PN"], how="left")
|
|
|
|
+ result = pd.concat([result, result_1995], axis=0)
|
|
|
|
+ # 获取1996数据
|
|
|
|
+ with(open("/root/r_base/HRS/1996/data/H96A_R.DA", "r", encoding="utf-8") )as file:
|
|
|
|
+ HHID_list = []
|
|
|
|
+ PN_list = []
|
|
|
|
+ BIRTH_YEAR_list = []
|
|
|
|
+ MARITAL_STATUS_list = []
|
|
|
|
+ EDUCATION_list = []
|
|
|
|
+ # 逐行读取文件
|
|
|
|
+ for line in file:
|
|
|
|
+ HHID = line[0:6]
|
|
|
|
+ PN = line[6:9]
|
|
|
|
+ BIRTH_YEAR = line[25:29]
|
|
|
|
+ # MARRIED, SPOUSE PRESENT........... 1
|
|
|
|
+ # MARRIED, SPOUSE ABSENT............ 2
|
|
|
|
+ # LIVING WITH SOMEONE............... 3 GO TO A11b
|
|
|
|
+ # DIVORCED/SEPARATED................ 4 GO TO A11g
|
|
|
|
+ # WIDOWED........................... 5 GO TO A11g
|
|
|
|
+ # NEVER MARRIED..................... 6 GO TO B1
|
|
|
|
+ # 7. MARRIED, SPOUSE ABSENT (NOT INSTITUTION)
|
|
|
|
+ MARITAL_STATUS = line[70:71]
|
|
|
|
+ # 1 Married or Partner; 5 other
|
|
|
|
+ MARITAL_STATUS = "1" if MARITAL_STATUS=="1" or MARITAL_STATUS=="2" or MARITAL_STATUS=="3" else "5" if MARITAL_STATUS=="4" or MARITAL_STATUS=="5" or MARITAL_STATUS=="6" or MARITAL_STATUS=="7" else np.nan
|
|
|
|
+ # 0 For no formal education
|
|
|
|
+ # 1-11 .....Grades
|
|
|
|
+ # 12 .......High school
|
|
|
|
+ # 13-15 ....Some college
|
|
|
|
+ # 16 .......College grad
|
|
|
|
+ # 17 .......Post college (17+ years)
|
|
|
|
+ # 97 .......Other
|
|
|
|
+ # 98. DK (don't know); NA (not ascertained)
|
|
|
|
+ # 99. RF (refused)
|
|
|
|
+ EDUCATION = np.nan if line[43:45] == "97" or line[43:45] == "98" or line[43:45] == "99" else line[43:45]
|
|
|
|
+ HHID_list.append(HHID)
|
|
|
|
+ PN_list.append(PN)
|
|
|
|
+ BIRTH_YEAR_list.append(BIRTH_YEAR)
|
|
|
|
+ MARITAL_STATUS_list.append(MARITAL_STATUS)
|
|
|
|
+ EDUCATION_list.append(EDUCATION)
|
|
|
|
+ data = {
|
|
|
|
+ "HHID":HHID_list,
|
|
|
|
+ "PN":PN_list,
|
|
|
|
+ "BIRTH_YEAR":BIRTH_YEAR_list,
|
|
|
|
+ "MARITAL_STATUS":MARITAL_STATUS_list,
|
|
|
|
+ "EDUCATION":EDUCATION_list,
|
|
|
|
+ }
|
|
|
|
+ data["WAVE"] = 1996
|
|
|
|
+ result_1996_one = pd.DataFrame(data)
|
|
|
|
+ with(open("/root/r_base/HRS/1996/data/H96CS_R.DA", "r", encoding="utf-8") )as file:
|
|
|
|
+ HHID_list = []
|
|
|
|
+ PN_list = []
|
|
|
|
+ SEX_list = []
|
|
|
|
+ # 逐行读取文件
|
|
|
|
+ for line in file:
|
|
|
|
+ HHID = line[0:6]
|
|
|
|
+ PN = line[6:9]
|
|
|
|
+ # 1.Male
|
|
|
|
+ # 2.Female
|
|
|
|
+ SEX = line[74:75]
|
|
|
|
+ HHID_list.append(HHID)
|
|
|
|
+ PN_list.append(PN)
|
|
|
|
+ SEX_list.append(SEX)
|
|
|
|
+ data = {
|
|
|
|
+ "HHID":HHID_list,
|
|
|
|
+ "PN":PN_list,
|
|
|
|
+ "SEX":SEX_list,
|
|
|
|
+ }
|
|
|
|
+ result_1996_two = pd.DataFrame(data)
|
|
|
|
+ result_1996 = pd.merge(result_1996_one, result_1996_two, on=["HHID", "PN"], how="left")
|
|
|
|
+ with(open("/root/r_base/HRS/1996/data/H96B_R.DA", "r", encoding="utf-8") )as file:
|
|
|
|
+ HHID_list = []
|
|
|
|
+ PN_list = []
|
|
|
|
+ SMOKED_list = []
|
|
|
|
+ DRINKED_list = []
|
|
|
|
+ PHYSICAL_ACTIVITY_LEVEL_list = []
|
|
|
|
+ BMI_list = []
|
|
|
|
+ HEART_PROBL_list = []
|
|
|
|
+ STROKE_list=[]
|
|
|
|
+ # 逐行读取文件
|
|
|
|
+ for line in file:
|
|
|
|
+ HHID = line[0:6]
|
|
|
|
+ PN = line[6:9]
|
|
|
|
+ # 1. YES
|
|
|
|
+ # 2. Cigars
|
|
|
|
+ # 3. PIPE (IF VOLUNTEERED)
|
|
|
|
+ # 5. NO
|
|
|
|
+ # 8. DK (don't know); NA (not ascertained)
|
|
|
|
+ # 9. RF (refused)
|
|
|
|
+ # 合并后
|
|
|
|
+ # 1. Yes
|
|
|
|
+ # 5. No [Inap in V502-V505]
|
|
|
|
+ SMOKED = "1" if line[160:161] == "1" or line[160:161] == "2" or line[160:161] == "3" else "5" if line[160:161] == "5" else np.nan
|
|
|
|
+ # 1. YES
|
|
|
|
+ # 3. [VOL] NEVER HAVE USED ALCOHOL
|
|
|
|
+ # 5. NO
|
|
|
|
+ # 8. DK (don't know); NA (not ascertained)
|
|
|
|
+ # 9. RF (refused)
|
|
|
|
+ # 合并后
|
|
|
|
+ # 1. Yes
|
|
|
|
+ # 5. No [Inap in V502-V505]
|
|
|
|
+ DRINKED = "1" if line[166:167] == "1" else "5" if line[166:167] == "5" or line[166:167] == "3" else np.nan
|
|
|
|
+ # 3. vigorous (vigorous activity more than once a week)
|
|
|
|
+ # 2. moderate (moderate activity more than once a week)
|
|
|
|
+ # 1. inactive (the rest)
|
|
|
|
+ # 重度活动
|
|
|
|
+ VIGOROUS_PHYSICAL = np.nan
|
|
|
|
+ # 轻度活动
|
|
|
|
+ LIGHT_PHYSICAL = np.nan
|
|
|
|
+ PHYSICAL_ACTIVITY_LEVEL = np.nan
|
|
|
|
+ # 体重
|
|
|
|
+ WEIGH= float(line[174:177])*0.45359237 if not line[174:177].strip() =="" and not float(line[174:177])>400 else np.nan
|
|
|
|
+ # 身高
|
|
|
|
+ HIGHT = line[178:180].strip()
|
|
|
|
+ if not HIGHT == "" and not HIGHT == "98" and not HIGHT == "99" and not pd.isna(WEIGH):
|
|
|
|
+ HIGHT = float(HIGHT)*0.3048 + float(line[180:182])*0.0254
|
|
|
|
+ # BMI
|
|
|
|
+ BMI = WEIGH / math.pow(HEIGHT,2)
|
|
|
|
+ else :
|
|
|
|
+ BMI = np.nan
|
|
|
|
+ # 1. YES
|
|
|
|
+ # 3. [VOL] DISPUTES W1 RECORD
|
|
|
|
+ # 5. NO
|
|
|
|
+ # 7. Other
|
|
|
|
+ # 8. DK (don't know); NA (not ascertained)
|
|
|
|
+ # 9. RF (refused)
|
|
|
|
+ # A heart attack, coronary heart disease, angina, congestive heart failure, or other heart problems
|
|
|
|
+ # 1. Yes
|
|
|
|
+ # 3. [VOL] DISPUTES W1 RECORD
|
|
|
|
+ # 5. No [Inap in V418-V421]
|
|
|
|
+ HEART_PROBL = np.nan if line[66:67] == "9" or line[66:67] == "" else line[66:67]
|
|
|
|
+ # STROKE
|
|
|
|
+ # 1. Yes
|
|
|
|
+ # 3. [VOL] DISPUTES W1 RECORD
|
|
|
|
+ # 5. No [Inap in V418-V421]
|
|
|
|
+ STROKE = np.nan if line[87:88] == "8" else "5" if line[87:88] == "2" or line[87:88] == "5" else line[87:88]
|
|
|
|
+ HHID_list.append(HHID)
|
|
|
|
+ PN_list.append(PN)
|
|
|
|
+ SMOKED_list.append(SMOKED)
|
|
|
|
+ DRINKED_list.append(DRINKED)
|
|
|
|
+ PHYSICAL_ACTIVITY_LEVEL_list.append(PHYSICAL_ACTIVITY_LEVEL)
|
|
|
|
+ BMI_list.append(BMI)
|
|
|
|
+ HEART_PROBL_list.append(HEART_PROBL)
|
|
|
|
+ STROKE_list.append(STROKE)
|
|
|
|
+ data = {
|
|
|
|
+ "HHID":HHID_list,
|
|
|
|
+ "PN":PN_list,
|
|
|
|
+ "SMOKED":SMOKED_list,
|
|
|
|
+ "DRINKED":DRINKED_list,
|
|
|
|
+ "PHYSICAL_ACTIVITY_LEVEL":PHYSICAL_ACTIVITY_LEVEL_list,
|
|
|
|
+ "BMI":BMI_list,
|
|
|
|
+ "HEART_PROBL":HEART_PROBL_list,
|
|
|
|
+ "STROKE":STROKE_list
|
|
|
|
+ }
|
|
|
|
+ result_1996_three = pd.DataFrame(data)
|
|
|
|
+ result_1996 = pd.merge(result_1996, result_1996_three, on=["HHID", "PN"], how="left")
|
|
|
|
+ result = pd.concat([result, result_1996], axis=0)
|
|
|
|
+ # 获取1998数据
|
|
|
|
+ with(open("/root/r_base/HRS/1998/data/H98A_R.DA", "r", encoding="utf-8") )as file:
|
|
|
|
+ HHID_list = []
|
|
|
|
+ PN_list = []
|
|
|
|
+ BIRTH_YEAR_list = []
|
|
|
|
+ MARITAL_STATUS_list = []
|
|
|
|
+ EDUCATION_list = []
|
|
|
|
+ # 逐行读取文件
|
|
|
|
+ for line in file:
|
|
|
|
+ HHID = line[0:6]
|
|
|
|
+ PN = line[6:9]
|
|
|
|
+ BIRTH_YEAR = line[31:35] if not line[31:35] == "9998" else np.nan
|
|
|
|
+ # 0. DK (don't know); NA (not ascertained); RF (refused)
|
|
|
|
+ # 1. MARRIED
|
|
|
|
+ # 3. SEPARATED
|
|
|
|
+ # 4. DIVORCED
|
|
|
|
+ # 5. WIDOWED
|
|
|
|
+ # 6. NEVER MARRIED
|
|
|
|
+ # 7. OTHER (SPECIFY)
|
|
|
|
+ MARITAL_STATUS = line[150:151]
|
|
|
|
+ # 1 Married or Partner; 5 other
|
|
|
|
+ MARITAL_STATUS = "1" if MARITAL_STATUS=="1" else "5" if MARITAL_STATUS=="3" or MARITAL_STATUS=="4" or MARITAL_STATUS=="5" or MARITAL_STATUS=="6" or MARITAL_STATUS=="7" else np.nan
|
|
|
|
+ # 0 For no formal education
|
|
|
|
+ # 1-11 .....Grades
|
|
|
|
+ # 12 .......High school
|
|
|
|
+ # 13-15 ....Some college
|
|
|
|
+ # 16 .......College grad
|
|
|
|
+ # 17 .......Post college (17+ years)
|
|
|
|
+ # 97 .......Other
|
|
|
|
+ # 98. DK (don't know); NA (not ascertained)
|
|
|
|
+ # 99. RF (refused)
|
|
|
|
+ EDUCATION = np.nan if line[61:62] == "97" or line[61:62] == "98" or line[61:62] == "99" else line[61:62]
|
|
|
|
+ HHID_list.append(HHID)
|
|
|
|
+ PN_list.append(PN)
|
|
|
|
+ BIRTH_YEAR_list.append(BIRTH_YEAR)
|
|
|
|
+ MARITAL_STATUS_list.append(MARITAL_STATUS)
|
|
|
|
+ EDUCATION_list.append(EDUCATION)
|
|
|
|
+ data = {
|
|
|
|
+ "HHID":HHID_list,
|
|
|
|
+ "PN":PN_list,
|
|
|
|
+ "BIRTH_YEAR":BIRTH_YEAR_list,
|
|
|
|
+ "MARITAL_STATUS":MARITAL_STATUS_list,
|
|
|
|
+ "EDUCATION":EDUCATION_list,
|
|
|
|
+ }
|
|
|
|
+ data["WAVE"] = 1998
|
|
|
|
+ result_1998_one = pd.DataFrame(data)
|
|
|
|
+ with(open("/root/r_base/HRS/1998/data/H98CS_R.DA", "r", encoding="utf-8") )as file:
|
|
|
|
+ HHID_list = []
|
|
|
|
+ PN_list = []
|
|
|
|
+ SEX_list = []
|
|
|
|
+ # 逐行读取文件
|
|
|
|
+ for line in file:
|
|
|
|
+ HHID = line[0:6]
|
|
|
|
+ PN = line[6:9]
|
|
|
|
+ # 1.Male
|
|
|
|
+ # 2.Female
|
|
|
|
+ SEX = line[70:71]
|
|
|
|
+ HHID_list.append(HHID)
|
|
|
|
+ PN_list.append(PN)
|
|
|
|
+ SEX_list.append(SEX)
|
|
|
|
+ data = {
|
|
|
|
+ "HHID":HHID_list,
|
|
|
|
+ "PN":PN_list,
|
|
|
|
+ "SEX":SEX_list,
|
|
|
|
+ }
|
|
|
|
+ result_1998_two = pd.DataFrame(data)
|
|
|
|
+ result_1998 = pd.merge(result_1998_one, result_1998_two, on=["HHID", "PN"], how="left")
|
|
|
|
+ with(open("/root/r_base/HRS/1998/data/H98B_R.DA", "r", encoding="utf-8") )as file:
|
|
|
|
+ HHID_list = []
|
|
|
|
+ PN_list = []
|
|
|
|
+ SMOKED_list = []
|
|
|
|
+ DRINKED_list = []
|
|
|
|
+ PHYSICAL_ACTIVITY_LEVEL_list = []
|
|
|
|
+ BMI_list = []
|
|
|
|
+ HEART_PROBL_list = []
|
|
|
|
+ STROKE_list=[]
|
|
|
|
+ # 逐行读取文件
|
|
|
|
+ for line in file:
|
|
|
|
+ HHID = line[0:6]
|
|
|
|
+ PN = line[6:9]
|
|
|
|
+ # 1. YES
|
|
|
|
+ # 5. NO
|
|
|
|
+ SMOKED = line[157:158] if line[157:158] == "1" or line[157:158] == "5" else np.nan
|
|
|
|
+ # 1. YES
|
|
|
|
+ # 3. [VOL] NEVER HAVE USED ALCOHOL
|
|
|
|
+ # 5. NO
|
|
|
|
+ # 8. DK (don't know); NA (not ascertained)
|
|
|
|
+ # 9. RF (refused)
|
|
|
|
+ # 合并后
|
|
|
|
+ # 1. Yes
|
|
|
|
+ # 5. No [Inap in V502-V505]
|
|
|
|
+ DRINKED = "1" if line[184:185] == "1" else "5" if line[184:185] == "5" or line[184:185] == "3" else np.nan
|
|
|
|
+ # 3. vigorous (vigorous activity more than once a week)
|
|
|
|
+ # 2. moderate (moderate activity more than once a week)
|
|
|
|
+ # 1. inactive (the rest)
|
|
|
|
+ # 重度活动
|
|
|
|
+ VIGOROUS_PHYSICAL = np.nan
|
|
|
|
+ # 轻度活动
|
|
|
|
+ LIGHT_PHYSICAL = np.nan
|
|
|
|
+ PHYSICAL_ACTIVITY_LEVEL = np.nan
|
|
|
|
+ # 体重
|
|
|
|
+ WEIGH= float(line[196:199])*0.45359237 if not float(line[196:199])>400 else np.nan
|
|
|
|
+ # 身高
|
|
|
|
+ HEIGHT = line[200:202].strip()
|
|
|
|
+ if not HEIGHT == "" and not HEIGHT == "98" and not HEIGHT == "99" and not pd.isna(WEIGH):
|
|
|
|
+ if not line[202:204] == "98":
|
|
|
|
+ HEIGHT = float(HEIGHT)*0.3048 + float(line[202:204])*0.0254
|
|
|
|
+ # BMI
|
|
|
|
+ BMI = WEIGH / math.pow(HEIGHT,2)
|
|
|
|
+ else:
|
|
|
|
+ HEIGHT = float(line[200:202])*0.3048
|
|
|
|
+ # BMI
|
|
|
|
+ BMI = WEIGH / math.pow(HEIGHT,2)
|
|
|
|
+ else :
|
|
|
|
+ BMI = np.nan
|
|
|
|
+ # 1. YES
|
|
|
|
+ # 3. [VOL] DISPUTES PREVIOUS WAVE RECORD
|
|
|
|
+ # 5. NO
|
|
|
|
+ # 6. PRELOAD ERROR: Condition reported at prior wave but said no to
|
|
|
|
+ # new event
|
|
|
|
+ # 8. DK (don't know); NA (not ascertained)
|
|
|
|
+ # 9. RF (refused)
|
|
|
|
+ # A heart attack, coronary heart disease, angina, congestive heart failure, or other heart problems
|
|
|
|
+ # 1. Yes
|
|
|
|
+ # 3. [VOL] DISPUTES W1 RECORD
|
|
|
|
+ # 5. No [Inap in V418-V421]
|
|
|
|
+ HEART_PROBL = np.nan if line[66:67] == "8" else line[66:67]
|
|
|
|
+ # 1. YES
|
|
|
|
+ # 2. [VOL] POSSIBLE STROKE OR TIA (TRANSIENT ISCHEMIC ATTACK)
|
|
|
|
+ # 3. [VOL] DISPUTES PREVIOUS WAVE RECORD
|
|
|
|
+ # 5. NO
|
|
|
|
+ # 8. DK (don't know); NA (not ascertained)
|
|
|
|
+ # 9. RF (refused)
|
|
|
|
+ # STROKE
|
|
|
|
+ # 1. Yes
|
|
|
|
+ # 3. [VOL] DISPUTES W1 RECORD
|
|
|
|
+ # 5. No [Inap in V418-V421]
|
|
|
|
+ STROKE = np.nan if line[87:88] == "8" else "5" if line[87:88] == "2" or line[87:88] == "5" else line[87:88]
|
|
|
|
+ HHID_list.append(HHID)
|
|
|
|
+ PN_list.append(PN)
|
|
|
|
+ SMOKED_list.append(SMOKED)
|
|
|
|
+ DRINKED_list.append(DRINKED)
|
|
|
|
+ PHYSICAL_ACTIVITY_LEVEL_list.append(PHYSICAL_ACTIVITY_LEVEL)
|
|
|
|
+ BMI_list.append(BMI)
|
|
|
|
+ HEART_PROBL_list.append(HEART_PROBL)
|
|
|
|
+ STROKE_list.append(STROKE)
|
|
|
|
+ data = {
|
|
|
|
+ "HHID":HHID_list,
|
|
|
|
+ "PN":PN_list,
|
|
|
|
+ "SMOKED":SMOKED_list,
|
|
|
|
+ "DRINKED":DRINKED_list,
|
|
|
|
+ "PHYSICAL_ACTIVITY_LEVEL":PHYSICAL_ACTIVITY_LEVEL_list,
|
|
|
|
+ "BMI":BMI_list,
|
|
|
|
+ "HEART_PROBL":HEART_PROBL_list,
|
|
|
|
+ "STROKE":STROKE_list
|
|
|
|
+ }
|
|
|
|
+ result_1998_three = pd.DataFrame(data)
|
|
|
|
+ result_1998 = pd.merge(result_1998, result_1998_three, on=["HHID", "PN"], how="left")
|
|
|
|
+ result = pd.concat([result, result_1998], axis=0)
|
|
|
|
+ # 获取2000数据
|
|
|
|
+ with(open("/root/r_base/HRS/2000/data/H00A_R.DA", "r", encoding="utf-8") )as file:
|
|
|
|
+ HHID_list = []
|
|
|
|
+ PN_list = []
|
|
|
|
+ BIRTH_YEAR_list = []
|
|
|
|
+ MARITAL_STATUS_list = []
|
|
|
|
+ EDUCATION_list = []
|
|
|
|
+ # 逐行读取文件
|
|
|
|
+ for line in file:
|
|
|
|
+ HHID = line[0:6]
|
|
|
|
+ PN = line[6:9]
|
|
|
|
+ BIRTH_YEAR = line[31:35]
|
|
|
|
+ # 0. DK (don't know); NA (not ascertained); RF (refused)
|
|
|
|
+ # 1. MARRIED
|
|
|
|
+ # 3. SEPARATED
|
|
|
|
+ # 4. DIVORCED
|
|
|
|
+ # 5. WIDOWED
|
|
|
|
+ # 6. NEVER MARRIED
|
|
|
|
+ # 7. OTHER (SPECIFY)
|
|
|
|
+ MARITAL_STATUS = line[152:153]
|
|
|
|
+ # 1 Married or Partner; 5 other
|
|
|
|
+ MARITAL_STATUS = "1" if MARITAL_STATUS=="1" else "5" if MARITAL_STATUS=="3" or MARITAL_STATUS=="4" or MARITAL_STATUS=="5" or MARITAL_STATUS=="6" or MARITAL_STATUS=="7" else np.nan
|
|
|
|
+ # 0 For no formal education
|
|
|
|
+ # 1-11 .....Grades
|
|
|
|
+ # 12 .......High school
|
|
|
|
+ # 13-15 ....Some college
|
|
|
|
+ # 16 .......College grad
|
|
|
|
+ # 17 .......Post college (17+ years)
|
|
|
|
+ # 97 .......Other
|
|
|
|
+ # 98. DK (don't know); NA (not ascertained)
|
|
|
|
+ # 99. RF (refused)
|
|
|
|
+ EDUCATION = np.nan if line[63:65] == "97" or line[63:65] == "98" or line[63:65] == "99" else line[63:65]
|
|
|
|
+ HHID_list.append(HHID)
|
|
|
|
+ PN_list.append(PN)
|
|
|
|
+ BIRTH_YEAR_list.append(BIRTH_YEAR)
|
|
|
|
+ MARITAL_STATUS_list.append(MARITAL_STATUS)
|
|
|
|
+ EDUCATION_list.append(EDUCATION)
|
|
|
|
+ data = {
|
|
|
|
+ "HHID":HHID_list,
|
|
|
|
+ "PN":PN_list,
|
|
|
|
+ "BIRTH_YEAR":BIRTH_YEAR_list,
|
|
|
|
+ "MARITAL_STATUS":MARITAL_STATUS_list,
|
|
|
|
+ "EDUCATION":EDUCATION_list,
|
|
|
|
+ }
|
|
|
|
+ data["WAVE"] = 2000
|
|
|
|
+ result_2000_one = pd.DataFrame(data)
|
|
|
|
+ with(open("/root/r_base/HRS/2000/data/H00CS_R.DA", "r", encoding="utf-8") )as file:
|
|
|
|
+ HHID_list = []
|
|
|
|
+ PN_list = []
|
|
|
|
+ SEX_list = []
|
|
|
|
+ # 逐行读取文件
|
|
|
|
+ for line in file:
|
|
|
|
+ HHID = line[0:6]
|
|
|
|
+ PN = line[6:9]
|
|
|
|
+ # 1.Male
|
|
|
|
+ # 2.Female
|
|
|
|
+ SEX = line[79:80]
|
|
|
|
+ HHID_list.append(HHID)
|
|
|
|
+ PN_list.append(PN)
|
|
|
|
+ SEX_list.append(SEX)
|
|
|
|
+ data = {
|
|
|
|
+ "HHID":HHID_list,
|
|
|
|
+ "PN":PN_list,
|
|
|
|
+ "SEX":SEX_list,
|
|
|
|
+ }
|
|
|
|
+ result_2000_two = pd.DataFrame(data)
|
|
|
|
+ result_2000 = pd.merge(result_2000_one, result_2000_two, on=["HHID", "PN"], how="left")
|
|
|
|
+ with(open("/root/r_base/HRS/2000/data/H00B_R.DA", "r", encoding="utf-8") )as file:
|
|
|
|
+ HHID_list = []
|
|
|
|
+ PN_list = []
|
|
|
|
+ SMOKED_list = []
|
|
|
|
+ DRINKED_list = []
|
|
|
|
+ PHYSICAL_ACTIVITY_LEVEL_list = []
|
|
|
|
+ BMI_list = []
|
|
|
|
+ HEART_PROBL_list = []
|
|
|
|
+ STROKE_list=[]
|
|
|
|
+ # 逐行读取文件
|
|
|
|
+ for line in file:
|
|
|
|
+ HHID = line[0:6]
|
|
|
|
+ PN = line[6:9]
|
|
|
|
+ # 1. YES
|
|
|
|
+ # 5. NO
|
|
|
|
+ SMOKED = line[154:155] if line[154:155] == "1" or line[154:155] == "5" else np.nan
|
|
|
|
+ # 1. YES
|
|
|
|
+ # 3. [VOL] NEVER HAVE USED ALCOHOL
|
|
|
|
+ # 5. NO
|
|
|
|
+ # 8. DK (don't know); NA (not ascertained)
|
|
|
|
+ # 9. RF (refused)
|
|
|
|
+ # 合并后
|
|
|
|
+ # 1. Yes
|
|
|
|
+ # 5. No [Inap in V502-V505]
|
|
|
|
+ DRINKED = "1" if line[181:182] == "1" else "5" if line[181:182] == "5" or line[181:182] == "3" else np.nan
|
|
|
|
+ # 3. vigorous (vigorous activity more than once a week)
|
|
|
|
+ # 2. moderate (moderate activity more than once a week)
|
|
|
|
+ # 1. inactive (the rest)
|
|
|
|
+ # 重度活动
|
|
|
|
+ VIGOROUS_PHYSICAL = np.nan
|
|
|
|
+ # 轻度活动
|
|
|
|
+ LIGHT_PHYSICAL = np.nan
|
|
|
|
+ PHYSICAL_ACTIVITY_LEVEL = np.nan
|
|
|
|
+ # 体重
|
|
|
|
+ WEIGH= float(line[193:196])*0.45359237 if not float(line[193:196])>400 else np.nan
|
|
|
|
+ # 身高
|
|
|
|
+ HEIGHT = line[197:198].strip()
|
|
|
|
+ if not HEIGHT == "" and not HEIGHT == "9" and not pd.isna(WEIGH):
|
|
|
|
+ HEIGHT = float(HEIGHT)*0.3048 + float(line[198:200])*0.0254
|
|
|
|
+ # BMI
|
|
|
|
+ BMI = WEIGH / math.pow(HEIGHT,2)
|
|
|
|
+ else :
|
|
|
|
+ BMI = np.nan
|
|
|
|
+ # 1. YES
|
|
|
|
+ # 3. DISPUTES PREVIOUS WAVE RECORD, BUT NOW HAS CONDITION
|
|
|
|
+ # 4. DISPUTES PREVIOUS WAVE RECORD, DOES NOT HAVE CONDITION
|
|
|
|
+ # 5. NO
|
|
|
|
+ # 8. DK (don't know); NA (not ascertained)
|
|
|
|
+ # 9. RF (refused)
|
|
|
|
+ # A heart attack, coronary heart disease, angina, congestive heart failure, or other heart problems
|
|
|
|
+ # 1. Yes
|
|
|
|
+ # 5. No [Inap in V418-V421]
|
|
|
|
+ HEART_PROBL = np.nan if line[65:66] == "8" or line[65:66] == "9" else "1" if line[65:66] == "1" or line[65:66] == "3" else "5"
|
|
|
|
+ # 1. YES
|
|
|
|
+ # 2. [VOL] POSSIBLE STROKE OR TIA (TRANSIENT ISCHEMIC ATTACK)
|
|
|
|
+ # 3. DISPUTES PREVIOUS WAVE RECORD, BUT NOW HAS CONDITION
|
|
|
|
+ # 4. DISPUTES PREVIOUS WAVE RECORD, DOES NOT HAVE CONDITION
|
|
|
|
+ # 5. NO
|
|
|
|
+ # 8. DK (don't know); NA (not ascertained)
|
|
|
|
+ # 9. RF (refused)
|
|
|
|
+ # STROKE
|
|
|
|
+ # 1. Yes
|
|
|
|
+ # 5. No [Inap in V418-V421]
|
|
|
|
+ STROKE = np.nan if line[86:87] == "8" or line[86:87] == "9" else "5" if line[86:87] == "2" or line[86:87] == "4" or line[86:87] == "5" else "1"
|
|
|
|
+ HHID_list.append(HHID)
|
|
|
|
+ PN_list.append(PN)
|
|
|
|
+ SMOKED_list.append(SMOKED)
|
|
|
|
+ DRINKED_list.append(DRINKED)
|
|
|
|
+ PHYSICAL_ACTIVITY_LEVEL_list.append(PHYSICAL_ACTIVITY_LEVEL)
|
|
|
|
+ BMI_list.append(BMI)
|
|
|
|
+ HEART_PROBL_list.append(HEART_PROBL)
|
|
|
|
+ STROKE_list.append(STROKE)
|
|
|
|
+ data = {
|
|
|
|
+ "HHID":HHID_list,
|
|
|
|
+ "PN":PN_list,
|
|
|
|
+ "SMOKED":SMOKED_list,
|
|
|
|
+ "DRINKED":DRINKED_list,
|
|
|
|
+ "PHYSICAL_ACTIVITY_LEVEL":PHYSICAL_ACTIVITY_LEVEL_list,
|
|
|
|
+ "BMI":BMI_list,
|
|
|
|
+ "HEART_PROBL":HEART_PROBL_list,
|
|
|
|
+ "STROKE":STROKE_list
|
|
|
|
+ }
|
|
|
|
+ result_2000_three = pd.DataFrame(data)
|
|
|
|
+ result_2000 = pd.merge(result_2000, result_2000_three, on=["HHID", "PN"], how="left")
|
|
|
|
+ result = pd.concat([result, result_2000], axis=0)
|
|
|
|
+ # 获取2002数据
|
|
|
|
+ with(open("/root/r_base/HRS/2002/data/H02PR_R.da", "r", encoding="utf-8") )as file:
|
|
|
|
+ HHID_list = []
|
|
|
|
+ PN_list = []
|
|
|
|
+ BIRTH_YEAR_list = []
|
|
|
|
+ SEX_list = []
|
|
|
|
+ # 逐行读取文件
|
|
|
|
+ for line in file:
|
|
|
|
+ HHID = line[0:6]
|
|
|
|
+ PN = line[6:9]
|
|
|
|
+ BIRTH_YEAR = line[36:40]
|
|
|
|
+ # 1.Male
|
|
|
|
+ # 2.Female
|
|
|
|
+ SEX = line[19:20]
|
|
|
|
+ HHID_list.append(HHID)
|
|
|
|
+ PN_list.append(PN)
|
|
|
|
+ BIRTH_YEAR_list.append(BIRTH_YEAR)
|
|
|
|
+ SEX_list.append(SEX)
|
|
|
|
+ data = {
|
|
|
|
+ "HHID":HHID_list,
|
|
|
|
+ "PN":PN_list,
|
|
|
|
+ "BIRTH_YEAR":BIRTH_YEAR_list,
|
|
|
|
+ "SEX":SEX_list,
|
|
|
|
+ }
|
|
|
|
+ data["WAVE"] = 2002
|
|
|
|
+ result_2002_one = pd.DataFrame(data)
|
|
|
|
+ with(open("/root/r_base/HRS/2002/data/H02C_R.da", "r", encoding="utf-8") )as file:
|
|
|
|
+ HHID_list = []
|
|
|
|
+ PN_list = []
|
|
|
|
+ SMOKED_list = []
|
|
|
|
+ DRINKED_list = []
|
|
|
|
+ BMI_list = []
|
|
|
|
+ HEART_PROBL_list = []
|
|
|
|
+ STROKE_list=[]
|
|
|
|
+ # 逐行读取文件
|
|
|
|
+ for line in file:
|
|
|
|
+ HHID = line[0:6]
|
|
|
|
+ PN = line[6:9]
|
|
|
|
+ # 1. YES
|
|
|
|
+ # 5. NO
|
|
|
|
+ SMOKED = line[155:156] if line[155:156] == "1" or line[155:156] == "5" else np.nan
|
|
|
|
+ # 1. YES
|
|
|
|
+ # 3. [VOL] NEVER HAVE USED ALCOHOL
|
|
|
|
+ # 5. NO
|
|
|
|
+ # 8. DK (don't know); NA (not ascertained)
|
|
|
|
+ # 9. RF (refused)
|
|
|
|
+ # 合并后
|
|
|
|
+ # 1. Yes
|
|
|
|
+ # 5. No [Inap in V502-V505]
|
|
|
|
+ DRINKED = "1" if line[179:180] == "1" else "5" if line[179:180] == "5" or line[179:180] == "3" else np.nan
|
|
|
|
+ # 体重
|
|
|
|
+ WEIGH= float(line[190:193])*0.45359237 if not float(line[190:193])>400 else np.nan
|
|
|
|
+ # 身高
|
|
|
|
+ HEIGHT = line[194:195].strip()
|
|
|
|
+ if not HEIGHT == "" and not HEIGHT == "8" and not pd.isna(WEIGH):
|
|
|
|
+ if not line[195:197] == "98" and not line[195:197]=="":
|
|
|
|
+ HEIGHT = float(HEIGHT)*0.3048 + float(line[195:197])*0.0254
|
|
|
|
+ # BMI
|
|
|
|
+ BMI = WEIGH / math.pow(HEIGHT,2)
|
|
|
|
+ else:
|
|
|
|
+ HEIGHT = float(line[194:195])*0.3048
|
|
|
|
+ # BMI
|
|
|
|
+ BMI = WEIGH / math.pow(HEIGHT,2)
|
|
|
|
+ else :
|
|
|
|
+ BMI = np.nan
|
|
|
|
+ # 1. YES
|
|
|
|
+ # 3. DISPUTES PREVIOUS WAVE RECORD, BUT NOW HAS CONDITION
|
|
|
|
+ # 4. DISPUTES PREVIOUS WAVE RECORD, DOES NOT HAVE CONDITION
|
|
|
|
+ # 5. NO
|
|
|
|
+ # 8. DK (don't know); NA (not ascertained)
|
|
|
|
+ # 9. RF (refused)
|
|
|
|
+ # A heart attack, coronary heart disease, angina, congestive heart failure, or other heart problems
|
|
|
|
+ # 1. Yes
|
|
|
|
+ # 5. No [Inap in V418-V421]
|
|
|
|
+ HEART_PROBL = np.nan if line[59:60] == "8" or line[59:60] == "9" else "1" if line[59:60] == "1" or line[59:60] == "3" else "5"
|
|
|
|
+ # 1. YES
|
|
|
|
+ # 2. [VOL] POSSIBLE STROKE OR TIA (TRANSIENT ISCHEMIC ATTACK)
|
|
|
|
+ # 3. DISPUTES PREVIOUS WAVE RECORD, BUT NOW HAS CONDITION
|
|
|
|
+ # 4. DISPUTES PREVIOUS WAVE RECORD, DOES NOT HAVE CONDITION
|
|
|
|
+ # 5. NO
|
|
|
|
+ # 8. DK (don't know); NA (not ascertained)
|
|
|
|
+ # 9. RF (refused)
|
|
|
|
+ # STROKE
|
|
|
|
+ # 1. Yes
|
|
|
|
+ # 5. No [Inap in V418-V421]
|
|
|
|
+ STROKE = np.nan if line[80:81] == "8" or line[80:81] == "9" else "5" if line[80:81] == "2" or line[80:81] == "4" or line[80:81] == "5" else "1"
|
|
|
|
+ HHID_list.append(HHID)
|
|
|
|
+ PN_list.append(PN)
|
|
|
|
+ SMOKED_list.append(SMOKED)
|
|
|
|
+ DRINKED_list.append(DRINKED)
|
|
|
|
+ BMI_list.append(BMI)
|
|
|
|
+ HEART_PROBL_list.append(HEART_PROBL)
|
|
|
|
+ STROKE_list.append(STROKE)
|
|
|
|
+ data = {
|
|
|
|
+ "HHID":HHID_list,
|
|
|
|
+ "PN":PN_list,
|
|
|
|
+ "SMOKED":SMOKED_list,
|
|
|
|
+ "DRINKED":DRINKED_list,
|
|
|
|
+ "BMI":BMI_list,
|
|
|
|
+ "HEART_PROBL":HEART_PROBL_list,
|
|
|
|
+ "STROKE":STROKE_list
|
|
|
|
+ }
|
|
|
|
+ result_2002_two = pd.DataFrame(data)
|
|
|
|
+ result_2002 = pd.merge(result_2002_one, result_2002_two, on=["HHID", "PN"], how="left")
|
|
|
|
+ with(open("/root/r_base/HRS/2002/data/H02V_R.da", "r", encoding="utf-8") )as file:
|
|
|
|
+ HHID_list = []
|
|
|
|
+ PN_list = []
|
|
|
|
+ PHYSICAL_ACTIVITY_LEVEL_list = []
|
|
|
|
+ # 逐行读取文件
|
|
|
|
+ for line in file:
|
|
|
|
+ HHID = line[0:6]
|
|
|
|
+ PN = line[6:9]
|
|
|
|
+ # 1. MORE THAN ONCE A WEEK
|
|
|
|
+ # 2. ONCE A WEEK
|
|
|
|
+ # 3. ONCE TO THREE TIMES A MONTH
|
|
|
|
+ # 4. HARDLY EVER OR NEVER
|
|
|
|
+ # 8. DK (Don't Know)
|
|
|
|
+ # 9. RF (Refused)
|
|
|
|
+ # 3. vigorous (vigorous activity more than once a week)
|
|
|
|
+ # 2. moderate (moderate activity more than once a week)
|
|
|
|
+ # 1. inactive (the rest)
|
|
|
|
+ # 重度活动
|
|
|
|
+ VIGOROUS_PHYSICAL = "3" if line[151:152] == "1" or line[151:152] == "2" else "1" if line[151:152] == "3" or line[151:152] == "4" else np.nan
|
|
|
|
+ #中度活动
|
|
|
|
+ MODERATE_PHYSICAL = "2" if line[152:153] == "1" or line[152:153] == "2" else "1" if line[152:153] == "3" or line[152:153] == "4" else np.nan
|
|
|
|
+ # 轻度活动
|
|
|
|
+ LIGHT_PHYSICAL = np.nan
|
|
|
|
+ PHYSICAL_ACTIVITY_LEVEL = "3" if VIGOROUS_PHYSICAL=="3" else "2" if MODERATE_PHYSICAL=="2" else "1" if VIGOROUS_PHYSICAL=="1" or MODERATE_PHYSICAL=="1" else np.nan
|
|
|
|
+ HHID_list.append(HHID)
|
|
|
|
+ PN_list.append(PN)
|
|
|
|
+ PHYSICAL_ACTIVITY_LEVEL_list.append(PHYSICAL_ACTIVITY_LEVEL)
|
|
|
|
+ data = {
|
|
|
|
+ "HHID":HHID_list,
|
|
|
|
+ "PN":PN_list,
|
|
|
|
+ "PHYSICAL_ACTIVITY_LEVEL":PHYSICAL_ACTIVITY_LEVEL_list
|
|
|
|
+ }
|
|
|
|
+ result_2002_three = pd.DataFrame(data)
|
|
|
|
+ result_2002 = pd.merge(result_2002, result_2002_three, on=["HHID", "PN"], how="left")
|
|
|
|
+ with(open("/root/r_base/HRS/2002/data/H02B_R.da", "r", encoding="utf-8") )as file:
|
|
|
|
+ HHID_list = []
|
|
|
|
+ PN_list = []
|
|
|
|
+ MARITAL_STATUS_list = []
|
|
|
|
+ EDUCATION_list = []
|
|
|
|
+ # 逐行读取文件
|
|
|
|
+ for line in file:
|
|
|
|
+ HHID = line[0:6]
|
|
|
|
+ PN = line[6:9]
|
|
|
|
+ # 1. Married
|
|
|
|
+ # 3. Separated
|
|
|
|
+ # 4. Divorced
|
|
|
|
+ # 5. Widowed
|
|
|
|
+ # 6. Never Married
|
|
|
|
+ # 7. Other (Specify)
|
|
|
|
+ # 8. DK (Don't Know)
|
|
|
|
+ # 9. RF (Refused)
|
|
|
|
+ MARITAL_STATUS = line[131:132]
|
|
|
|
+ # 1 Married or Partner; 5 other
|
|
|
|
+ MARITAL_STATUS = "1" if MARITAL_STATUS=="1" else "5" if MARITAL_STATUS=="3" or MARITAL_STATUS=="4" or MARITAL_STATUS=="5" or MARITAL_STATUS=="6" or MARITAL_STATUS=="7" else np.nan
|
|
|
|
+ # 0 For no formal education
|
|
|
|
+ # 1-11 .....Grades
|
|
|
|
+ # 12 .......High school
|
|
|
|
+ # 13-15 ....Some college
|
|
|
|
+ # 16 .......College grad
|
|
|
|
+ # 17 .......Post college (17+ years)
|
|
|
|
+ # 97 .......Other
|
|
|
|
+ # 98. DK (don't know); NA (not ascertained)
|
|
|
|
+ # 99. RF (refused)
|
|
|
|
+ EDUCATION = np.nan if line[42:44] == "97" or line[42:44] == "98" or line[42:44] == "99" else line[42:44]
|
|
|
|
+ HHID_list.append(HHID)
|
|
|
|
+ PN_list.append(PN)
|
|
|
|
+ MARITAL_STATUS_list.append(MARITAL_STATUS)
|
|
|
|
+ EDUCATION_list.append(EDUCATION)
|
|
|
|
+ data = {
|
|
|
|
+ "HHID":HHID_list,
|
|
|
|
+ "PN":PN_list,
|
|
|
|
+ "MARITAL_STATUS":MARITAL_STATUS_list,
|
|
|
|
+ "EDUCATION":EDUCATION_list
|
|
|
|
+ }
|
|
|
|
+ result_2002_four = pd.DataFrame(data)
|
|
|
|
+ result_2002 = pd.merge(result_2002, result_2002_four, on=["HHID", "PN"], how="left")
|
|
|
|
+ result = pd.concat([result, result_2002], axis=0)
|
|
|
|
+ # 获取2004数据
|
|
|
|
+ with(open("/root/r_base/HRS/2004/data/H04PR_R.da", "r", encoding="utf-8") )as file:
|
|
|
|
+ HHID_list = []
|
|
|
|
+ PN_list = []
|
|
|
|
+ BIRTH_YEAR_list = []
|
|
|
|
+ SEX_list = []
|
|
|
|
+ # 逐行读取文件
|
|
|
|
+ for line in file:
|
|
|
|
+ HHID = line[0:6]
|
|
|
|
+ PN = line[6:9]
|
|
|
|
+ BIRTH_YEAR = line[25:29]
|
|
|
|
+ # 1.Male
|
|
|
|
+ # 2.Female
|
|
|
|
+ SEX = line[20:21]
|
|
|
|
+ HHID_list.append(HHID)
|
|
|
|
+ PN_list.append(PN)
|
|
|
|
+ BIRTH_YEAR_list.append(BIRTH_YEAR)
|
|
|
|
+ SEX_list.append(SEX)
|
|
|
|
+ data = {
|
|
|
|
+ "HHID":HHID_list,
|
|
|
|
+ "PN":PN_list,
|
|
|
|
+ "BIRTH_YEAR":BIRTH_YEAR_list,
|
|
|
|
+ "SEX":SEX_list,
|
|
|
|
+ }
|
|
|
|
+ data["WAVE"] = 2004
|
|
|
|
+ result_2004_one = pd.DataFrame(data)
|
|
|
|
+ with(open("/root/r_base/HRS/2004/data/H04C_R.da", "r", encoding="utf-8") )as file:
|
|
|
|
+ HHID_list = []
|
|
|
|
+ PN_list = []
|
|
|
|
+ SMOKED_list = []
|
|
|
|
+ DRINKED_list = []
|
|
|
|
+ BMI_list = []
|
|
|
|
+ HEART_PROBL_list = []
|
|
|
|
+ PHYSICAL_ACTIVITY_LEVEL_list = []
|
|
|
|
+ STROKE_list=[]
|
|
|
|
+ # 逐行读取文件
|
|
|
|
+ for line in file:
|
|
|
|
+ HHID = line[0:6]
|
|
|
|
+ PN = line[6:9]
|
|
|
|
+ # 1. YES
|
|
|
|
+ # 5. NO
|
|
|
|
+ SMOKED = line[167:168] if line[167:168] == "1" or line[167:168] == "5" else np.nan
|
|
|
|
+ # 1. YES
|
|
|
|
+ # 3. [VOL] NEVER HAVE USED ALCOHOL
|
|
|
|
+ # 5. NO
|
|
|
|
+ # 8. DK (don't know); NA (not ascertained)
|
|
|
|
+ # 9. RF (refused)
|
|
|
|
+ # 合并后
|
|
|
|
+ # 1. Yes
|
|
|
|
+ # 5. No [Inap in V502-V505]
|
|
|
|
+ DRINKED = "1" if line[192:193] == "1" else "5" if line[192:193] == "5" or line[192:193] == "3" else np.nan
|
|
|
|
+ # 3. vigorous (vigorous activity more than once a week)
|
|
|
|
+ # 2. moderate (moderate activity more than once a week)
|
|
|
|
+ # 1. inactive (the rest)
|
|
|
|
+ # 体重
|
|
|
|
+ WEIGH= float(line[203:206])*0.45359237 if not float(line[203:206])>400 else np.nan
|
|
|
|
+ # 身高
|
|
|
|
+ HEIGHT = line[222:223].strip()
|
|
|
|
+ if not HEIGHT == "" and not HEIGHT == "8" and not pd.isna(WEIGH):
|
|
|
|
+ if not line[223:225] == "98" and not line[223:225]=="":
|
|
|
|
+ HEIGHT = float(HEIGHT)*0.3048 + float(line[223:225])*0.0254
|
|
|
|
+ # BMI
|
|
|
|
+ BMI = WEIGH / math.pow(HEIGHT,2)
|
|
|
|
+ else:
|
|
|
|
+ HEIGHT = float(line[222:223])*0.3048
|
|
|
|
+ # BMI
|
|
|
|
+ BMI = WEIGH / math.pow(HEIGHT,2)
|
|
|
|
+ else :
|
|
|
|
+ BMI = np.nan
|
|
|
|
+ # 1. YES
|
|
|
|
+ # 3. DISPUTES PREVIOUS WAVE RECORD, BUT NOW HAS CONDITION
|
|
|
|
+ # 4. DISPUTES PREVIOUS WAVE RECORD, DOES NOT HAVE CONDITION
|
|
|
|
+ # 5. NO
|
|
|
|
+ # 8. DK (don't know); NA (not ascertained)
|
|
|
|
+ # 9. RF (refused)
|
|
|
|
+ # A heart attack, coronary heart disease, angina, congestive heart failure, or other heart problems
|
|
|
|
+ # 1. Yes
|
|
|
|
+ # 5. No [Inap in V418-V421]
|
|
|
|
+ HEART_PROBL = np.nan if line[69:70] == "8" or line[69:70] == "9" else "1" if line[69:70] == "1" or line[69:70] == "3" else "5"
|
|
|
|
+ # 1. YES
|
|
|
|
+ # 2. [VOL] POSSIBLE STROKE OR TIA (TRANSIENT ISCHEMIC ATTACK)
|
|
|
|
+ # 3. DISPUTES PREVIOUS WAVE RECORD, BUT NOW HAS CONDITION
|
|
|
|
+ # 4. DISPUTES PREVIOUS WAVE RECORD, DOES NOT HAVE CONDITION
|
|
|
|
+ # 5. NO
|
|
|
|
+ # 8. DK (don't know); NA (not ascertained)
|
|
|
|
+ # 9. RF (refused)
|
|
|
|
+ # STROKE
|
|
|
|
+ # 1. Yes
|
|
|
|
+ # 5. No [Inap in V418-V421]
|
|
|
|
+ STROKE = np.nan if line[90:91] == "8" or line[90:91] == "9" else "5" if line[90:91] == "2" or line[90:91] == "4" or line[90:91] == "5" else "1"
|
|
|
|
+ # 1. MORE THAN ONCE A WEEK
|
|
|
|
+ # 2. ONCE A WEEK
|
|
|
|
+ # 3. ONCE TO THREE TIMES A MONTH
|
|
|
|
+ # 4. HARDLY EVER OR NEVER
|
|
|
|
+ # 7. (VOL) EVERY DAY
|
|
|
|
+ # 8. DK (Don't Know)
|
|
|
|
+ # 9. RF (Refused)
|
|
|
|
+ # 3. vigorous (vigorous activity more than once a week)
|
|
|
|
+ # 2. moderate (moderate activity more than once a week)
|
|
|
|
+ # 1. inactive (the rest)
|
|
|
|
+ # 重度活动
|
|
|
|
+ VIGOROUS_PHYSICAL = "3" if line[164:165] == "1" or line[164:165] == "2" or line[164:165] == "7" else "1" if line[164:165] == "3" or line[164:165] == "4" else np.nan
|
|
|
|
+ #中度活动
|
|
|
|
+ MODERATE_PHYSICAL = "2" if line[165:166] == "1" or line[165:166] == "2" or line[165:166] == "7" else "1" if line[165:166] == "3" or line[165:166] == "4" else np.nan
|
|
|
|
+ # 轻度活动
|
|
|
|
+ LIGHT_PHYSICAL = np.nan
|
|
|
|
+ PHYSICAL_ACTIVITY_LEVEL = "3" if VIGOROUS_PHYSICAL=="3" else "2" if MODERATE_PHYSICAL=="2" else "1" if VIGOROUS_PHYSICAL=="1" or MODERATE_PHYSICAL=="1" else np.nan
|
|
|
|
+ HHID_list.append(HHID)
|
|
|
|
+ PN_list.append(PN)
|
|
|
|
+ PHYSICAL_ACTIVITY_LEVEL_list.append(PHYSICAL_ACTIVITY_LEVEL)
|
|
|
|
+ SMOKED_list.append(SMOKED)
|
|
|
|
+ DRINKED_list.append(DRINKED)
|
|
|
|
+ BMI_list.append(BMI)
|
|
|
|
+ HEART_PROBL_list.append(HEART_PROBL)
|
|
|
|
+ STROKE_list.append(STROKE)
|
|
|
|
+ data = {
|
|
|
|
+ "HHID":HHID_list,
|
|
|
|
+ "PN":PN_list,
|
|
|
|
+ "SMOKED":SMOKED_list,
|
|
|
|
+ "DRINKED":DRINKED_list,
|
|
|
|
+ "BMI":BMI_list,
|
|
|
|
+ "HEART_PROBL":HEART_PROBL_list,
|
|
|
|
+ "STROKE":STROKE_list,
|
|
|
|
+ "PHYSICAL_ACTIVITY_LEVEL":PHYSICAL_ACTIVITY_LEVEL_list
|
|
|
|
+ }
|
|
|
|
+ result_2004_two = pd.DataFrame(data)
|
|
|
|
+ result_2004 = pd.merge(result_2004_one, result_2004_two, on=["HHID", "PN"], how="left")
|
|
|
|
+ with(open("/root/r_base/HRS/2004/data/H04B_R.da", "r", encoding="utf-8") )as file:
|
|
|
|
+ HHID_list = []
|
|
|
|
+ PN_list = []
|
|
|
|
+ MARITAL_STATUS_list = []
|
|
|
|
+ EDUCATION_list = []
|
|
|
|
+ # 逐行读取文件
|
|
|
|
+ for line in file:
|
|
|
|
+ HHID = line[0:6]
|
|
|
|
+ PN = line[6:9]
|
|
|
|
+ # 1. MARRIED (VOL)
|
|
|
|
+ # 2. ANULLED (VOL)
|
|
|
|
+ # 3. SEPARATED
|
|
|
|
+ # 4. DIVORCED
|
|
|
|
+ # 5. WIDOWED
|
|
|
|
+ # 6. NEVER MARRIED
|
|
|
|
+ # 7. OTHER (SPECIFY)
|
|
|
|
+ MARITAL_STATUS = line[161:162]
|
|
|
|
+ # 1 Married or Partner; 5 other
|
|
|
|
+ MARITAL_STATUS = "1" if MARITAL_STATUS=="1" else "5" if MARITAL_STATUS=="2" or MARITAL_STATUS=="3" or MARITAL_STATUS=="4" or MARITAL_STATUS=="5" or MARITAL_STATUS=="6" or MARITAL_STATUS=="7" else np.nan
|
|
|
|
+ # 0 For no formal education
|
|
|
|
+ # 1-11 .....Grades
|
|
|
|
+ # 12 .......High school
|
|
|
|
+ # 13-15 ....Some college
|
|
|
|
+ # 16 .......College grad
|
|
|
|
+ # 17 .......Post college (17+ years)
|
|
|
|
+ # 97 .......Other
|
|
|
|
+ # 98. DK (don't know); NA (not ascertained)
|
|
|
|
+ # 99. RF (refused)
|
|
|
|
+ EDUCATION = np.nan if line[41:43] == "97" or line[41:43] == "98" or line[41:43] == "99" else line[41:43]
|
|
|
|
+ HHID_list.append(HHID)
|
|
|
|
+ PN_list.append(PN)
|
|
|
|
+ MARITAL_STATUS_list.append(MARITAL_STATUS)
|
|
|
|
+ EDUCATION_list.append(EDUCATION)
|
|
|
|
+ data = {
|
|
|
|
+ "HHID":HHID_list,
|
|
|
|
+ "PN":PN_list,
|
|
|
|
+ "MARITAL_STATUS":MARITAL_STATUS_list,
|
|
|
|
+ "EDUCATION":EDUCATION_list
|
|
|
|
+ }
|
|
|
|
+ result_2004_four = pd.DataFrame(data)
|
|
|
|
+ result_2004 = pd.merge(result_2004, result_2004_four, on=["HHID", "PN"], how="left")
|
|
|
|
+ result = pd.concat([result, result_2004], axis=0)
|
|
|
|
+ # 获取2006数据
|
|
|
|
+ with(open("/root/r_base/HRS/2006/data/H06PR_R.da", "r", encoding="utf-8") )as file:
|
|
|
|
+ HHID_list = []
|
|
|
|
+ PN_list = []
|
|
|
|
+ BIRTH_YEAR_list = []
|
|
|
|
+ SEX_list = []
|
|
|
|
+ # 逐行读取文件
|
|
|
|
+ for line in file:
|
|
|
|
+ HHID = line[0:6]
|
|
|
|
+ PN = line[6:9]
|
|
|
|
+ BIRTH_YEAR = line[25:29]
|
|
|
|
+ # 1.Male
|
|
|
|
+ # 2.Female
|
|
|
|
+ SEX = line[20:21]
|
|
|
|
+ HHID_list.append(HHID)
|
|
|
|
+ PN_list.append(PN)
|
|
|
|
+ BIRTH_YEAR_list.append(BIRTH_YEAR)
|
|
|
|
+ SEX_list.append(SEX)
|
|
|
|
+ data = {
|
|
|
|
+ "HHID":HHID_list,
|
|
|
|
+ "PN":PN_list,
|
|
|
|
+ "BIRTH_YEAR":BIRTH_YEAR_list,
|
|
|
|
+ "SEX":SEX_list,
|
|
|
|
+ }
|
|
|
|
+ data["WAVE"] = 2006
|
|
|
|
+ result_2006_one = pd.DataFrame(data)
|
|
|
|
+ with(open("/root/r_base/HRS/2006/data/H06C_R.da", "r", encoding="utf-8") )as file:
|
|
|
|
+ HHID_list = []
|
|
|
|
+ PN_list = []
|
|
|
|
+ SMOKED_list = []
|
|
|
|
+ DRINKED_list = []
|
|
|
|
+ BMI_list = []
|
|
|
|
+ HEART_PROBL_list = []
|
|
|
|
+ PHYSICAL_ACTIVITY_LEVEL_list = []
|
|
|
|
+ STROKE_list=[]
|
|
|
|
+ # 逐行读取文件
|
|
|
|
+ for line in file:
|
|
|
|
+ HHID = line[0:6]
|
|
|
|
+ PN = line[6:9]
|
|
|
|
+ # 1. YES
|
|
|
|
+ # 5. NO
|
|
|
|
+ SMOKED = line[181:182]
|
|
|
|
+ SMOKED = SMOKED if SMOKED == "1" or SMOKED == "5" else np.nan
|
|
|
|
+ # 1. YES
|
|
|
|
+ # 3. [VOL] NEVER HAVE USED ALCOHOL
|
|
|
|
+ # 5. NO
|
|
|
|
+ # 8. DK (don't know); NA (not ascertained)
|
|
|
|
+ # 9. RF (refused)
|
|
|
|
+ # 合并后
|
|
|
|
+ # 1. Yes
|
|
|
|
+ # 5. No [Inap in V502-V505]
|
|
|
|
+ DRINKED = line[207:208]
|
|
|
|
+ DRINKED = "1" if DRINKED == "1" else "5" if DRINKED == "5" or DRINKED == "3" else np.nan
|
|
|
|
+ # 3. vigorous (vigorous activity more than once a week)
|
|
|
|
+ # 2. moderate (moderate activity more than once a week)
|
|
|
|
+ # 1. inactive (the rest)
|
|
|
|
+ # 体重
|
|
|
|
+ WEIGH = line[218:221].strip()
|
|
|
|
+ WEIGH= float(WEIGH)*0.45359237 if not WEIGH=="" and not float(WEIGH)>400 else np.nan
|
|
|
|
+ # 身高
|
|
|
|
+ HEIGHT_FEET = line[252:253]
|
|
|
|
+ HEIGHT_INCHES = line[253:255]
|
|
|
|
+ if not HEIGHT_FEET == "" and not HEIGHT_FEET == "8" and not HEIGHT_FEET == "9" and not pd.isna(WEIGH):
|
|
|
|
+ if not HEIGHT_INCHES == "98" and not HEIGHT_INCHES=="":
|
|
|
|
+ HEIGHT = float(HEIGHT_FEET)*0.3048 + float(HEIGHT_INCHES)*0.0254
|
|
|
|
+ # BMI
|
|
|
|
+ BMI = WEIGH / math.pow(HEIGHT,2)
|
|
|
|
+ else:
|
|
|
|
+ HEIGHT = float(HEIGHT_FEET)*0.3048
|
|
|
|
+ # BMI
|
|
|
|
+ BMI = WEIGH / math.pow(HEIGHT,2)
|
|
|
|
+ else :
|
|
|
|
+ BMI = np.nan
|
|
|
|
+ # 1. YES
|
|
|
|
+ # 3. DISPUTES PREVIOUS WAVE RECORD, BUT NOW HAS CONDITION
|
|
|
|
+ # 4. DISPUTES PREVIOUS WAVE RECORD, DOES NOT HAVE CONDITION
|
|
|
|
+ # 5. NO
|
|
|
|
+ # 8. DK (don't know); NA (not ascertained)
|
|
|
|
+ # 9. RF (refused)
|
|
|
|
+ # A heart attack, coronary heart disease, angina, congestive heart failure, or other heart problems
|
|
|
|
+ # 1. Yes
|
|
|
|
+ # 5. No [Inap in V418-V421]
|
|
|
|
+ HEART_PROBL = line[80:81]
|
|
|
|
+ HEART_PROBL = np.nan if HEART_PROBL == "8" or HEART_PROBL == "9" or HEART_PROBL == "" else "1" if HEART_PROBL == "1" or HEART_PROBL == "3" else "5"
|
|
|
|
+ # 1. YES
|
|
|
|
+ # 2. [VOL] POSSIBLE STROKE OR TIA (TRANSIENT ISCHEMIC ATTACK)
|
|
|
|
+ # 3. DISPUTES PREVIOUS WAVE RECORD, BUT NOW HAS CONDITION
|
|
|
|
+ # 4. DISPUTES PREVIOUS WAVE RECORD, DOES NOT HAVE CONDITION
|
|
|
|
+ # 5. NO
|
|
|
|
+ # 8. DK (don't know); NA (not ascertained)
|
|
|
|
+ # 9. RF (refused)
|
|
|
|
+ # STROKE
|
|
|
|
+ # 1. Yes
|
|
|
|
+ # 5. No [Inap in V418-V421]
|
|
|
|
+ STROKE = line[101:102]
|
|
|
|
+ STROKE = np.nan if STROKE == "8" or STROKE == "9" or STROKE == "" else "5" if STROKE == "2" or STROKE == "4" or STROKE == "5" else "1"
|
|
|
|
+ # 1. MORE THAN ONCE A WEEK
|
|
|
|
+ # 2. ONCE A WEEK
|
|
|
|
+ # 3. ONCE TO THREE TIMES A MONTH
|
|
|
|
+ # 4. HARDLY EVER OR NEVER
|
|
|
|
+ # 7. (VOL) EVERY DAY
|
|
|
|
+ # 8. DK (Don't Know)
|
|
|
|
+ # 9. RF (Refused)
|
|
|
|
+ # 3. vigorous (vigorous activity more than once a week)
|
|
|
|
+ # 2. moderate (moderate activity more than once a week)
|
|
|
|
+ # 1. inactive (the rest)
|
|
|
|
+ # 重度活动
|
|
|
|
+ VIGOROUS_PHYSICAL = line[178:179]
|
|
|
|
+ VIGOROUS_PHYSICAL = "3" if VIGOROUS_PHYSICAL == "1" or VIGOROUS_PHYSICAL == "2" or VIGOROUS_PHYSICAL == "7" else "1" if VIGOROUS_PHYSICAL == "3" or VIGOROUS_PHYSICAL == "4" else np.nan
|
|
|
|
+ #中度活动
|
|
|
|
+ MODERATE_PHYSICAL = line[179:180]
|
|
|
|
+ MODERATE_PHYSICAL = "2" if MODERATE_PHYSICAL == "1" or MODERATE_PHYSICAL == "2" or MODERATE_PHYSICAL == "7" else "1" if MODERATE_PHYSICAL == "3" or MODERATE_PHYSICAL == "4" else np.nan
|
|
|
|
+ # 轻度活动
|
|
|
|
+ LIGHT_PHYSICAL = np.nan
|
|
|
|
+ PHYSICAL_ACTIVITY_LEVEL = "3" if VIGOROUS_PHYSICAL=="3" else "2" if MODERATE_PHYSICAL=="2" else "1" if VIGOROUS_PHYSICAL=="1" or MODERATE_PHYSICAL=="1" else np.nan
|
|
|
|
+ HHID_list.append(HHID)
|
|
|
|
+ PN_list.append(PN)
|
|
|
|
+ PHYSICAL_ACTIVITY_LEVEL_list.append(PHYSICAL_ACTIVITY_LEVEL)
|
|
|
|
+ SMOKED_list.append(SMOKED)
|
|
|
|
+ DRINKED_list.append(DRINKED)
|
|
|
|
+ BMI_list.append(BMI)
|
|
|
|
+ HEART_PROBL_list.append(HEART_PROBL)
|
|
|
|
+ STROKE_list.append(STROKE)
|
|
|
|
+ data = {
|
|
|
|
+ "HHID":HHID_list,
|
|
|
|
+ "PN":PN_list,
|
|
|
|
+ "SMOKED":SMOKED_list,
|
|
|
|
+ "DRINKED":DRINKED_list,
|
|
|
|
+ "BMI":BMI_list,
|
|
|
|
+ "HEART_PROBL":HEART_PROBL_list,
|
|
|
|
+ "STROKE":STROKE_list,
|
|
|
|
+ "PHYSICAL_ACTIVITY_LEVEL":PHYSICAL_ACTIVITY_LEVEL_list
|
|
|
|
+ }
|
|
|
|
+ result_2006_two = pd.DataFrame(data)
|
|
|
|
+ result_2006 = pd.merge(result_2006_one, result_2006_two, on=["HHID", "PN"], how="left")
|
|
|
|
+ with(open("/root/r_base/HRS/2006/data/H06B_R.da", "r", encoding="utf-8") )as file:
|
|
|
|
+ HHID_list = []
|
|
|
|
+ PN_list = []
|
|
|
|
+ MARITAL_STATUS_list = []
|
|
|
|
+ EDUCATION_list = []
|
|
|
|
+ # 逐行读取文件
|
|
|
|
+ for line in file:
|
|
|
|
+ HHID = line[0:6]
|
|
|
|
+ PN = line[6:9]
|
|
|
|
+ # 1. MARRIED (VOL)
|
|
|
|
+ # 2. ANULLED (VOL)
|
|
|
|
+ # 3. SEPARATED
|
|
|
|
+ # 4. DIVORCED
|
|
|
|
+ # 5. WIDOWED
|
|
|
|
+ # 6. NEVER MARRIED
|
|
|
|
+ # 7. OTHER (SPECIFY)
|
|
|
|
+ # 8. DK (Don't Know); NA (Not Ascertained)
|
|
|
|
+ # 9. RF (Refused)
|
|
|
|
+ MARITAL_STATUS = line[177:178]
|
|
|
|
+ # 1 Married or Partner; 5 other
|
|
|
|
+ MARITAL_STATUS = "1" if MARITAL_STATUS=="1" else "5" if MARITAL_STATUS=="2" or MARITAL_STATUS=="3" or MARITAL_STATUS=="4" or MARITAL_STATUS=="5" or MARITAL_STATUS=="6" or MARITAL_STATUS=="7" else np.nan
|
|
|
|
+ # 0 For no formal education
|
|
|
|
+ # 1-11 .....Grades
|
|
|
|
+ # 12 .......High school
|
|
|
|
+ # 13-15 ....Some college
|
|
|
|
+ # 16 .......College grad
|
|
|
|
+ # 17 .......Post college (17+ years)
|
|
|
|
+ # 97 .......Other
|
|
|
|
+ # 98. DK (don't know); NA (not ascertained)
|
|
|
|
+ # 99. RF (refused)
|
|
|
|
+ EDUCATION = line[47:49]
|
|
|
|
+ EDUCATION = np.nan if EDUCATION == "97" or EDUCATION == "98" or EDUCATION == "99" else EDUCATION
|
|
|
|
+ HHID_list.append(HHID)
|
|
|
|
+ PN_list.append(PN)
|
|
|
|
+ MARITAL_STATUS_list.append(MARITAL_STATUS)
|
|
|
|
+ EDUCATION_list.append(EDUCATION)
|
|
|
|
+ data = {
|
|
|
|
+ "HHID":HHID_list,
|
|
|
|
+ "PN":PN_list,
|
|
|
|
+ "MARITAL_STATUS":MARITAL_STATUS_list,
|
|
|
|
+ "EDUCATION":EDUCATION_list
|
|
|
|
+ }
|
|
|
|
+ result_2006_four = pd.DataFrame(data)
|
|
|
|
+ result_2006 = pd.merge(result_2006, result_2006_four, on=["HHID", "PN"], how="left")
|
|
|
|
+ result = pd.concat([result, result_2006], axis=0)
|
|
|
|
+ # 获取2008数据
|
|
|
|
+ with(open("/root/r_base/HRS/2008/data/H08PR_R.da", "r", encoding="utf-8") )as file:
|
|
|
|
+ HHID_list = []
|
|
|
|
+ PN_list = []
|
|
|
|
+ BIRTH_YEAR_list = []
|
|
|
|
+ SEX_list = []
|
|
|
|
+ # 逐行读取文件
|
|
|
|
+ for line in file:
|
|
|
|
+ HHID = line[0:6]
|
|
|
|
+ PN = line[6:9]
|
|
|
|
+ BIRTH_YEAR = line[25:29]
|
|
|
|
+ # 1.Male
|
|
|
|
+ # 2.Female
|
|
|
|
+ SEX = line[20:21]
|
|
|
|
+ HHID_list.append(HHID)
|
|
|
|
+ PN_list.append(PN)
|
|
|
|
+ BIRTH_YEAR_list.append(BIRTH_YEAR)
|
|
|
|
+ SEX_list.append(SEX)
|
|
|
|
+ data = {
|
|
|
|
+ "HHID":HHID_list,
|
|
|
|
+ "PN":PN_list,
|
|
|
|
+ "BIRTH_YEAR":BIRTH_YEAR_list,
|
|
|
|
+ "SEX":SEX_list,
|
|
|
|
+ }
|
|
|
|
+ data["WAVE"] = 2008
|
|
|
|
+ result_2008_one = pd.DataFrame(data)
|
|
|
|
+ with(open("/root/r_base/HRS/2008/data/H08C_R.da", "r", encoding="utf-8") )as file:
|
|
|
|
+ HHID_list = []
|
|
|
|
+ PN_list = []
|
|
|
|
+ SMOKED_list = []
|
|
|
|
+ DRINKED_list = []
|
|
|
|
+ BMI_list = []
|
|
|
|
+ HEART_PROBL_list = []
|
|
|
|
+ PHYSICAL_ACTIVITY_LEVEL_list = []
|
|
|
|
+ STROKE_list=[]
|
|
|
|
+ # 逐行读取文件
|
|
|
|
+ for line in file:
|
|
|
|
+ HHID = line[0:6]
|
|
|
|
+ PN = line[6:9]
|
|
|
|
+ # 1. YES
|
|
|
|
+ # 5. NO
|
|
|
|
+ SMOKED = line[258:259]
|
|
|
|
+ SMOKED = SMOKED if SMOKED == "1" or SMOKED == "5" else np.nan
|
|
|
|
+ # 1. YES
|
|
|
|
+ # 3. [VOL] NEVER HAVE USED ALCOHOL
|
|
|
|
+ # 5. NO
|
|
|
|
+ # 8. DK (don't know); NA (not ascertained)
|
|
|
|
+ # 9. RF (refused)
|
|
|
|
+ # 合并后
|
|
|
|
+ # 1. Yes
|
|
|
|
+ # 5. No [Inap in V502-V505]
|
|
|
|
+ DRINKED = line[284:285]
|
|
|
|
+ DRINKED = "1" if DRINKED == "1" else "5" if DRINKED == "5" or DRINKED == "3" else np.nan
|
|
|
|
+ # 3. vigorous (vigorous activity more than once a week)
|
|
|
|
+ # 2. moderate (moderate activity more than once a week)
|
|
|
|
+ # 1. inactive (the rest)
|
|
|
|
+ # 体重
|
|
|
|
+ WEIGH = line[295:298]
|
|
|
|
+ WEIGH= float(WEIGH)*0.45359237 if not float(WEIGH)>400 else np.nan
|
|
|
|
+ # 身高
|
|
|
|
+ HEIGHT_FEET = line[329:330].strip()
|
|
|
|
+ HEIGHT_INCHES = line[330:338].strip()
|
|
|
|
+ if not HEIGHT_FEET == "" and not HEIGHT_FEET == "8" and not HEIGHT_FEET == "9" and not pd.isna(WEIGH):
|
|
|
|
+ if not HEIGHT_INCHES == "99998" and not HEIGHT_INCHES=="":
|
|
|
|
+ HEIGHT = float(HEIGHT_FEET)*0.3048 + float(HEIGHT_INCHES)*0.0254
|
|
|
|
+ # BMI
|
|
|
|
+ BMI = WEIGH / math.pow(HEIGHT,2)
|
|
|
|
+ else:
|
|
|
|
+ HEIGHT = float(HEIGHT_FEET)*0.3048
|
|
|
|
+ # BMI
|
|
|
|
+ BMI = WEIGH / math.pow(HEIGHT,2)
|
|
|
|
+ else :
|
|
|
|
+ BMI = np.nan
|
|
|
|
+ # 1. YES
|
|
|
|
+ # 3. DISPUTES PREVIOUS WAVE RECORD, BUT NOW HAS CONDITION
|
|
|
|
+ # 4. DISPUTES PREVIOUS WAVE RECORD, DOES NOT HAVE CONDITION
|
|
|
|
+ # 5. NO
|
|
|
|
+ # 8. DK (don't know); NA (not ascertained)
|
|
|
|
+ # 9. RF (refused)
|
|
|
|
+ # A heart attack, coronary heart disease, angina, congestive heart failure, or other heart problems
|
|
|
|
+ # 1. Yes
|
|
|
|
+ # 5. No [Inap in V418-V421]
|
|
|
|
+ HEART_PROBL = line[128:129]
|
|
|
|
+ HEART_PROBL = np.nan if HEART_PROBL == "8" or HEART_PROBL == "9" or HEART_PROBL == "" else "1" if HEART_PROBL == "1" or HEART_PROBL == "3" else "5"
|
|
|
|
+ # 1. YES
|
|
|
|
+ # 2. [VOL] POSSIBLE STROKE OR TIA (TRANSIENT ISCHEMIC ATTACK)
|
|
|
|
+ # 3. DISPUTES PREVIOUS WAVE RECORD, BUT NOW HAS CONDITION
|
|
|
|
+ # 4. DISPUTES PREVIOUS WAVE RECORD, DOES NOT HAVE CONDITION
|
|
|
|
+ # 5. NO
|
|
|
|
+ # 8. DK (don't know); NA (not ascertained)
|
|
|
|
+ # 9. RF (refused)
|
|
|
|
+ # STROKE
|
|
|
|
+ # 1. Yes
|
|
|
|
+ # 5. No [Inap in V418-V421]
|
|
|
|
+ STROKE = line[149:150]
|
|
|
|
+ STROKE = np.nan if STROKE == "8" or STROKE == "9" or STROKE == "" else "5" if STROKE == "2" or STROKE == "4" or STROKE == "5" else "1"
|
|
|
|
+ # 1. MORE THAN ONCE A WEEK
|
|
|
|
+ # 2. ONCE A WEEK
|
|
|
|
+ # 3. ONCE TO THREE TIMES A MONTH
|
|
|
|
+ # 4. HARDLY EVER OR NEVER
|
|
|
|
+ # 7. (VOL) EVERY DAY
|
|
|
|
+ # 8. DK (Don't Know)
|
|
|
|
+ # 9. RF (Refused)
|
|
|
|
+ # 3. vigorous (vigorous activity more than once a week)
|
|
|
|
+ # 2. moderate (moderate activity more than once a week)
|
|
|
|
+ # 1. inactive (the rest)
|
|
|
|
+ # 重度活动
|
|
|
|
+ VIGOROUS_PHYSICAL = line[255:256]
|
|
|
|
+ VIGOROUS_PHYSICAL = "3" if VIGOROUS_PHYSICAL == "1" or VIGOROUS_PHYSICAL == "2" or VIGOROUS_PHYSICAL == "7" else "1" if VIGOROUS_PHYSICAL == "3" or VIGOROUS_PHYSICAL == "4" else np.nan
|
|
|
|
+ #中度活动
|
|
|
|
+ MODERATE_PHYSICAL = line[256:257]
|
|
|
|
+ MODERATE_PHYSICAL = "2" if MODERATE_PHYSICAL == "1" or MODERATE_PHYSICAL == "2" or MODERATE_PHYSICAL == "7" else "1" if MODERATE_PHYSICAL == "3" or MODERATE_PHYSICAL == "4" else np.nan
|
|
|
|
+ # 轻度活动
|
|
|
|
+ LIGHT_PHYSICAL = np.nan
|
|
|
|
+ PHYSICAL_ACTIVITY_LEVEL = "3" if VIGOROUS_PHYSICAL=="3" else "2" if MODERATE_PHYSICAL=="2" else "1" if VIGOROUS_PHYSICAL=="1" or MODERATE_PHYSICAL=="1" else np.nan
|
|
|
|
+ HHID_list.append(HHID)
|
|
|
|
+ PN_list.append(PN)
|
|
|
|
+ PHYSICAL_ACTIVITY_LEVEL_list.append(PHYSICAL_ACTIVITY_LEVEL)
|
|
|
|
+ SMOKED_list.append(SMOKED)
|
|
|
|
+ DRINKED_list.append(DRINKED)
|
|
|
|
+ BMI_list.append(BMI)
|
|
|
|
+ HEART_PROBL_list.append(HEART_PROBL)
|
|
|
|
+ STROKE_list.append(STROKE)
|
|
|
|
+ data = {
|
|
|
|
+ "HHID":HHID_list,
|
|
|
|
+ "PN":PN_list,
|
|
|
|
+ "SMOKED":SMOKED_list,
|
|
|
|
+ "DRINKED":DRINKED_list,
|
|
|
|
+ "BMI":BMI_list,
|
|
|
|
+ "HEART_PROBL":HEART_PROBL_list,
|
|
|
|
+ "STROKE":STROKE_list,
|
|
|
|
+ "PHYSICAL_ACTIVITY_LEVEL":PHYSICAL_ACTIVITY_LEVEL_list
|
|
|
|
+ }
|
|
|
|
+ result_2008_two = pd.DataFrame(data)
|
|
|
|
+ result_2008 = pd.merge(result_2008_one, result_2008_two, on=["HHID", "PN"], how="left")
|
|
|
|
+ with(open("/root/r_base/HRS/2008/data/H08B_R.da", "r", encoding="utf-8") )as file:
|
|
|
|
+ HHID_list = []
|
|
|
|
+ PN_list = []
|
|
|
|
+ MARITAL_STATUS_list = []
|
|
|
|
+ EDUCATION_list = []
|
|
|
|
+ # 逐行读取文件
|
|
|
|
+ for line in file:
|
|
|
|
+ HHID = line[0:6]
|
|
|
|
+ PN = line[6:9]
|
|
|
|
+ # 1. MARRIED (VOL)
|
|
|
|
+ # 2. ANULLED (VOL)
|
|
|
|
+ # 3. SEPARATED
|
|
|
|
+ # 4. DIVORCED
|
|
|
|
+ # 5. WIDOWED
|
|
|
|
+ # 6. NEVER MARRIED
|
|
|
|
+ # 7. OTHER (SPECIFY)
|
|
|
|
+ # 8. DK (Don't Know); NA (Not Ascertained)
|
|
|
|
+ # 9. RF (Refused)
|
|
|
|
+ MARITAL_STATUS = line[311:312]
|
|
|
|
+ # 1 Married or Partner; 5 other
|
|
|
|
+ MARITAL_STATUS = "1" if MARITAL_STATUS=="1" else "5" if MARITAL_STATUS=="2" or MARITAL_STATUS=="3" or MARITAL_STATUS=="4" or MARITAL_STATUS=="5" or MARITAL_STATUS=="6" or MARITAL_STATUS=="7" else np.nan
|
|
|
|
+ # 0 For no formal education
|
|
|
|
+ # 1-11 .....Grades
|
|
|
|
+ # 12 .......High school
|
|
|
|
+ # 13-15 ....Some college
|
|
|
|
+ # 16 .......College grad
|
|
|
|
+ # 17 .......Post college (17+ years)
|
|
|
|
+ # 97 .......Other
|
|
|
|
+ # 98. DK (don't know); NA (not ascertained)
|
|
|
|
+ # 99. RF (refused)
|
|
|
|
+ EDUCATION = line[48:50]
|
|
|
|
+ EDUCATION = np.nan if EDUCATION == "97" or EDUCATION == "98" or EDUCATION == "99" or EDUCATION == "" else EDUCATION
|
|
|
|
+ HHID_list.append(HHID)
|
|
|
|
+ PN_list.append(PN)
|
|
|
|
+ MARITAL_STATUS_list.append(MARITAL_STATUS)
|
|
|
|
+ EDUCATION_list.append(EDUCATION)
|
|
|
|
+ data = {
|
|
|
|
+ "HHID":HHID_list,
|
|
|
|
+ "PN":PN_list,
|
|
|
|
+ "MARITAL_STATUS":MARITAL_STATUS_list,
|
|
|
|
+ "EDUCATION":EDUCATION_list
|
|
|
|
+ }
|
|
|
|
+ result_2008_four = pd.DataFrame(data)
|
|
|
|
+ result_2008 = pd.merge(result_2008, result_2008_four, on=["HHID", "PN"], how="left")
|
|
|
|
+ result = pd.concat([result, result_2008], axis=0)
|
|
|
|
+ # 获取2010数据
|
|
|
|
+ with(open("/root/r_base/HRS/2010/data/H10PR_R.da", "r", encoding="utf-8") )as file:
|
|
|
|
+ HHID_list = []
|
|
|
|
+ PN_list = []
|
|
|
|
+ BIRTH_YEAR_list = []
|
|
|
|
+ SEX_list = []
|
|
|
|
+ # 逐行读取文件
|
|
|
|
+ for line in file:
|
|
|
|
+ HHID = line[0:6]
|
|
|
|
+ PN = line[6:9]
|
|
|
|
+ BIRTH_YEAR = line[25:29]
|
|
|
|
+ # 1.Male
|
|
|
|
+ # 2.Female
|
|
|
|
+ SEX = line[20:21]
|
|
|
|
+ HHID_list.append(HHID)
|
|
|
|
+ PN_list.append(PN)
|
|
|
|
+ BIRTH_YEAR_list.append(BIRTH_YEAR)
|
|
|
|
+ SEX_list.append(SEX)
|
|
|
|
+ data = {
|
|
|
|
+ "HHID":HHID_list,
|
|
|
|
+ "PN":PN_list,
|
|
|
|
+ "BIRTH_YEAR":BIRTH_YEAR_list,
|
|
|
|
+ "SEX":SEX_list,
|
|
|
|
+ }
|
|
|
|
+ data["WAVE"] = 2010
|
|
|
|
+ result_2010_one = pd.DataFrame(data)
|
|
|
|
+ with(open("/root/r_base/HRS/2010/data/H10C_R.da", "r", encoding="utf-8") )as file:
|
|
|
|
+ HHID_list = []
|
|
|
|
+ PN_list = []
|
|
|
|
+ SMOKED_list = []
|
|
|
|
+ DRINKED_list = []
|
|
|
|
+ BMI_list = []
|
|
|
|
+ HEART_PROBL_list = []
|
|
|
|
+ PHYSICAL_ACTIVITY_LEVEL_list = []
|
|
|
|
+ STROKE_list=[]
|
|
|
|
+ # 逐行读取文件
|
|
|
|
+ for line in file:
|
|
|
|
+ HHID = line[0:6]
|
|
|
|
+ PN = line[6:9]
|
|
|
|
+ # 1. YES
|
|
|
|
+ # 5. NO
|
|
|
|
+ SMOKED = line[250:251]
|
|
|
|
+ SMOKED = SMOKED if SMOKED == "1" or SMOKED == "5" else np.nan
|
|
|
|
+ # 1. YES
|
|
|
|
+ # 3. [VOL] NEVER HAVE USED ALCOHOL
|
|
|
|
+ # 5. NO
|
|
|
|
+ # 8. DK (don't know); NA (not ascertained)
|
|
|
|
+ # 9. RF (refused)
|
|
|
|
+ # 合并后
|
|
|
|
+ # 1. Yes
|
|
|
|
+ # 5. No [Inap in V502-V505]
|
|
|
|
+ DRINKED = line[276:277]
|
|
|
|
+ DRINKED = "1" if DRINKED == "1" else "5" if DRINKED == "5" or DRINKED == "3" else np.nan
|
|
|
|
+ # 3. vigorous (vigorous activity more than once a week)
|
|
|
|
+ # 2. moderate (moderate activity more than once a week)
|
|
|
|
+ # 1. inactive (the rest)
|
|
|
|
+ # 体重
|
|
|
|
+ WEIGH = line[287:290].strip()
|
|
|
|
+ WEIGH= float(WEIGH)*0.45359237 if not WEIGH=="" and not float(WEIGH)>400 else np.nan
|
|
|
|
+ # 身高
|
|
|
|
+ HEIGHT_FEET = line[304:305].strip()
|
|
|
|
+ HEIGHT_INCHES = line[305:313]
|
|
|
|
+ if not HEIGHT_FEET == "" and not HEIGHT_FEET == "8" and not HEIGHT_FEET == "9" and not pd.isna(WEIGH):
|
|
|
|
+ if not HEIGHT_INCHES == "99998" and not HEIGHT_INCHES=="":
|
|
|
|
+ HEIGHT = float(HEIGHT_FEET)*0.3048 + float(HEIGHT_INCHES)*0.0254
|
|
|
|
+ # BMI
|
|
|
|
+ BMI = WEIGH / math.pow(HEIGHT,2)
|
|
|
|
+ else:
|
|
|
|
+ HEIGHT = float(HEIGHT_FEET)*0.3048
|
|
|
|
+ # BMI
|
|
|
|
+ BMI = WEIGH / math.pow(HEIGHT,2)
|
|
|
|
+ else :
|
|
|
|
+ BMI = np.nan
|
|
|
|
+ # 1. YES
|
|
|
|
+ # 3. DISPUTES PREVIOUS WAVE RECORD, BUT NOW HAS CONDITION
|
|
|
|
+ # 4. DISPUTES PREVIOUS WAVE RECORD, DOES NOT HAVE CONDITION
|
|
|
|
+ # 5. NO
|
|
|
|
+ # 8. DK (don't know); NA (not ascertained)
|
|
|
|
+ # 9. RF (refused)
|
|
|
|
+ # A heart attack, coronary heart disease, angina, congestive heart failure, or other heart problems
|
|
|
|
+ # 1. Yes
|
|
|
|
+ # 5. No [Inap in V418-V421]
|
|
|
|
+ HEART_PROBL = line[76:77]
|
|
|
|
+ HEART_PROBL = np.nan if HEART_PROBL == "8" or HEART_PROBL == "9" or HEART_PROBL == "" else "1" if HEART_PROBL == "1" or HEART_PROBL == "3" else "5"
|
|
|
|
+ # 1. YES
|
|
|
|
+ # 2. [VOL] POSSIBLE STROKE OR TIA (TRANSIENT ISCHEMIC ATTACK)
|
|
|
|
+ # 3. DISPUTES PREVIOUS WAVE RECORD, BUT NOW HAS CONDITION
|
|
|
|
+ # 4. DISPUTES PREVIOUS WAVE RECORD, DOES NOT HAVE CONDITION
|
|
|
|
+ # 5. NO
|
|
|
|
+ # 8. DK (don't know); NA (not ascertained)
|
|
|
|
+ # 9. RF (refused)
|
|
|
|
+ # STROKE
|
|
|
|
+ # 1. Yes
|
|
|
|
+ # 5. No [Inap in V418-V421]
|
|
|
|
+ STROKE = line[139:140]
|
|
|
|
+ STROKE = np.nan if STROKE == "8" or STROKE == "9" or STROKE == "" else "5" if STROKE == "2" or STROKE == "4" or STROKE == "5" else "1"
|
|
|
|
+ # 1. MORE THAN ONCE A WEEK
|
|
|
|
+ # 2. ONCE A WEEK
|
|
|
|
+ # 3. ONCE TO THREE TIMES A MONTH
|
|
|
|
+ # 4. HARDLY EVER OR NEVER
|
|
|
|
+ # 7. (VOL) EVERY DAY
|
|
|
|
+ # 8. DK (Don't Know)
|
|
|
|
+ # 9. RF (Refused)
|
|
|
|
+ # 3. vigorous (vigorous activity more than once a week)
|
|
|
|
+ # 2. moderate (moderate activity more than once a week)
|
|
|
|
+ # 1. inactive (the rest)
|
|
|
|
+ # 重度活动
|
|
|
|
+ VIGOROUS_PHYSICAL = line[247:248]
|
|
|
|
+ VIGOROUS_PHYSICAL = "3" if VIGOROUS_PHYSICAL == "1" or VIGOROUS_PHYSICAL == "2" or VIGOROUS_PHYSICAL == "7" else "1" if VIGOROUS_PHYSICAL == "3" or VIGOROUS_PHYSICAL == "4" else np.nan
|
|
|
|
+ #中度活动
|
|
|
|
+ MODERATE_PHYSICAL = line[248:249]
|
|
|
|
+ MODERATE_PHYSICAL = "2" if MODERATE_PHYSICAL == "1" or MODERATE_PHYSICAL == "2" or MODERATE_PHYSICAL == "7" else "1" if MODERATE_PHYSICAL == "3" or MODERATE_PHYSICAL == "4" else np.nan
|
|
|
|
+ # 轻度活动
|
|
|
|
+ LIGHT_PHYSICAL = np.nan
|
|
|
|
+ PHYSICAL_ACTIVITY_LEVEL = "3" if VIGOROUS_PHYSICAL=="3" else "2" if MODERATE_PHYSICAL=="2" else "1" if VIGOROUS_PHYSICAL=="1" or MODERATE_PHYSICAL=="1" else np.nan
|
|
|
|
+ HHID_list.append(HHID)
|
|
|
|
+ PN_list.append(PN)
|
|
|
|
+ PHYSICAL_ACTIVITY_LEVEL_list.append(PHYSICAL_ACTIVITY_LEVEL)
|
|
|
|
+ SMOKED_list.append(SMOKED)
|
|
|
|
+ DRINKED_list.append(DRINKED)
|
|
|
|
+ BMI_list.append(BMI)
|
|
|
|
+ HEART_PROBL_list.append(HEART_PROBL)
|
|
|
|
+ STROKE_list.append(STROKE)
|
|
|
|
+ data = {
|
|
|
|
+ "HHID":HHID_list,
|
|
|
|
+ "PN":PN_list,
|
|
|
|
+ "SMOKED":SMOKED_list,
|
|
|
|
+ "DRINKED":DRINKED_list,
|
|
|
|
+ "BMI":BMI_list,
|
|
|
|
+ "HEART_PROBL":HEART_PROBL_list,
|
|
|
|
+ "STROKE":STROKE_list,
|
|
|
|
+ "PHYSICAL_ACTIVITY_LEVEL":PHYSICAL_ACTIVITY_LEVEL_list
|
|
|
|
+ }
|
|
|
|
+ result_2010_two = pd.DataFrame(data)
|
|
|
|
+ result_2010 = pd.merge(result_2010_one, result_2010_two, on=["HHID", "PN"], how="left")
|
|
|
|
+ with(open("/root/r_base/HRS/2010/data/H10B_R.da", "r", encoding="utf-8") )as file:
|
|
|
|
+ HHID_list = []
|
|
|
|
+ PN_list = []
|
|
|
|
+ MARITAL_STATUS_list = []
|
|
|
|
+ EDUCATION_list = []
|
|
|
|
+ # 逐行读取文件
|
|
|
|
+ for line in file:
|
|
|
|
+ HHID = line[0:6]
|
|
|
|
+ PN = line[6:9]
|
|
|
|
+ # 1. MARRIED (VOL)
|
|
|
|
+ # 2. ANULLED (VOL)
|
|
|
|
+ # 3. SEPARATED
|
|
|
|
+ # 4. DIVORCED
|
|
|
|
+ # 5. WIDOWED
|
|
|
|
+ # 6. NEVER MARRIED
|
|
|
|
+ # 7. OTHER (SPECIFY)
|
|
|
|
+ # 8. DK (Don't Know); NA (Not Ascertained)
|
|
|
|
+ # 9. RF (Refused)
|
|
|
|
+ MARITAL_STATUS = line[305:306]
|
|
|
|
+ # 1 Married or Partner; 5 other
|
|
|
|
+ MARITAL_STATUS = "1" if MARITAL_STATUS=="1" else "5" if MARITAL_STATUS=="2" or MARITAL_STATUS=="3" or MARITAL_STATUS=="4" or MARITAL_STATUS=="5" or MARITAL_STATUS=="6" or MARITAL_STATUS=="7" else np.nan
|
|
|
|
+ # 0 For no formal education
|
|
|
|
+ # 1-11 .....Grades
|
|
|
|
+ # 12 .......High school
|
|
|
|
+ # 13-15 ....Some college
|
|
|
|
+ # 16 .......College grad
|
|
|
|
+ # 17 .......Post college (17+ years)
|
|
|
|
+ # 97 .......Other
|
|
|
|
+ # 98. DK (don't know); NA (not ascertained)
|
|
|
|
+ # 99. RF (refused)
|
|
|
|
+ EDUCATION = line[48:50]
|
|
|
|
+ EDUCATION = np.nan if EDUCATION == "97" or EDUCATION == "98" or EDUCATION == "99" or EDUCATION == "" else EDUCATION
|
|
|
|
+ HHID_list.append(HHID)
|
|
|
|
+ PN_list.append(PN)
|
|
|
|
+ MARITAL_STATUS_list.append(MARITAL_STATUS)
|
|
|
|
+ EDUCATION_list.append(EDUCATION)
|
|
|
|
+ data = {
|
|
|
|
+ "HHID":HHID_list,
|
|
|
|
+ "PN":PN_list,
|
|
|
|
+ "MARITAL_STATUS":MARITAL_STATUS_list,
|
|
|
|
+ "EDUCATION":EDUCATION_list
|
|
|
|
+ }
|
|
|
|
+ result_2010_four = pd.DataFrame(data)
|
|
|
|
+ result_2010 = pd.merge(result_2010, result_2010_four, on=["HHID", "PN"], how="left")
|
|
|
|
+ result = pd.concat([result, result_2010], axis=0)
|
|
|
|
+ # 获取2012数据
|
|
|
|
+ with(open("/root/r_base/HRS/2012/data/H12PR_R.da", "r", encoding="utf-8") )as file:
|
|
|
|
+ HHID_list = []
|
|
|
|
+ PN_list = []
|
|
|
|
+ BIRTH_YEAR_list = []
|
|
|
|
+ SEX_list = []
|
|
|
|
+ # 逐行读取文件
|
|
|
|
+ for line in file:
|
|
|
|
+ HHID = line[0:6]
|
|
|
|
+ PN = line[6:9]
|
|
|
|
+ BIRTH_YEAR = line[25:29] if not line[25:29] == "" else np.nan
|
|
|
|
+ # 1.Male
|
|
|
|
+ # 2.Female
|
|
|
|
+ SEX = line[20:21] if not line[20:21] == "" else np.nan
|
|
|
|
+ HHID_list.append(HHID)
|
|
|
|
+ PN_list.append(PN)
|
|
|
|
+ BIRTH_YEAR_list.append(BIRTH_YEAR)
|
|
|
|
+ SEX_list.append(SEX)
|
|
|
|
+ data = {
|
|
|
|
+ "HHID":HHID_list,
|
|
|
|
+ "PN":PN_list,
|
|
|
|
+ "BIRTH_YEAR":BIRTH_YEAR_list,
|
|
|
|
+ "SEX":SEX_list,
|
|
|
|
+ }
|
|
|
|
+ data["WAVE"] = 2012
|
|
|
|
+ result_2012_one = pd.DataFrame(data)
|
|
|
|
+ with(open("/root/r_base/HRS/2012/data/H12C_R.da", "r", encoding="utf-8") )as file:
|
|
|
|
+ HHID_list = []
|
|
|
|
+ PN_list = []
|
|
|
|
+ SMOKED_list = []
|
|
|
|
+ DRINKED_list = []
|
|
|
|
+ BMI_list = []
|
|
|
|
+ HEART_PROBL_list = []
|
|
|
|
+ PHYSICAL_ACTIVITY_LEVEL_list = []
|
|
|
|
+ STROKE_list=[]
|
|
|
|
+ # 逐行读取文件
|
|
|
|
+ for line in file:
|
|
|
|
+ HHID = line[0:6]
|
|
|
|
+ PN = line[6:9]
|
|
|
|
+ # 1. YES
|
|
|
|
+ # 5. NO
|
|
|
|
+ SMOKED = line[249:250]
|
|
|
|
+ SMOKED = SMOKED if SMOKED == "1" or SMOKED == "5" else np.nan
|
|
|
|
+ # 1. YES
|
|
|
|
+ # 3. [VOL] NEVER HAVE USED ALCOHOL
|
|
|
|
+ # 5. NO
|
|
|
|
+ # 8. DK (don't know); NA (not ascertained)
|
|
|
|
+ # 9. RF (refused)
|
|
|
|
+ # 合并后
|
|
|
|
+ # 1. Yes
|
|
|
|
+ # 5. No [Inap in V502-V505]
|
|
|
|
+ DRINKED = line[276:277]
|
|
|
|
+ DRINKED = "1" if DRINKED == "1" else "5" if DRINKED == "5" or DRINKED == "3" else np.nan
|
|
|
|
+ # 3. vigorous (vigorous activity more than once a week)
|
|
|
|
+ # 2. moderate (moderate activity more than once a week)
|
|
|
|
+ # 1. inactive (the rest)
|
|
|
|
+ # 体重
|
|
|
|
+ WEIGH = line[287:290].strip()
|
|
|
|
+ WEIGH= float(WEIGH)*0.45359237 if not WEIGH=="" and not float(WEIGH)>400 else np.nan
|
|
|
|
+ # 身高
|
|
|
|
+ HEIGHT_FEET = line[302:303].strip()
|
|
|
|
+ HEIGHT_INCHES = line[303:308]
|
|
|
|
+ if not HEIGHT_FEET == "" and not HEIGHT_FEET == "8" and not HEIGHT_FEET == "9" and not pd.isna(WEIGH):
|
|
|
|
+ if not HEIGHT_INCHES == "98" and not HEIGHT_INCHES=="":
|
|
|
|
+ HEIGHT = float(HEIGHT_FEET)*0.3048 + float(HEIGHT_INCHES)*0.0254
|
|
|
|
+ # BMI
|
|
|
|
+ BMI = WEIGH / math.pow(HEIGHT,2)
|
|
|
|
+ else:
|
|
|
|
+ HEIGHT = float(HEIGHT_FEET)*0.3048
|
|
|
|
+ # BMI
|
|
|
|
+ BMI = WEIGH / math.pow(HEIGHT,2)
|
|
|
|
+ else :
|
|
|
|
+ BMI = np.nan
|
|
|
|
+ # 1. YES
|
|
|
|
+ # 3. DISPUTES PREVIOUS WAVE RECORD, BUT NOW HAS CONDITION
|
|
|
|
+ # 4. DISPUTES PREVIOUS WAVE RECORD, DOES NOT HAVE CONDITION
|
|
|
|
+ # 5. NO
|
|
|
|
+ # 8. DK (don't know); NA (not ascertained)
|
|
|
|
+ # 9. RF (refused)
|
|
|
|
+ # A heart attack, coronary heart disease, angina, congestive heart failure, or other heart problems
|
|
|
|
+ # 1. Yes
|
|
|
|
+ # 5. No [Inap in V418-V421]
|
|
|
|
+ HEART_PROBL = line[82:83]
|
|
|
|
+ HEART_PROBL = np.nan if HEART_PROBL == "8" or HEART_PROBL == "9" or HEART_PROBL == "" else "1" if HEART_PROBL == "1" or HEART_PROBL == "3" else "5"
|
|
|
|
+ # 1. YES
|
|
|
|
+ # 2. [VOL] POSSIBLE STROKE OR TIA (TRANSIENT ISCHEMIC ATTACK)
|
|
|
|
+ # 3. DISPUTES PREVIOUS WAVE RECORD, BUT NOW HAS CONDITION
|
|
|
|
+ # 4. DISPUTES PREVIOUS WAVE RECORD, DOES NOT HAVE CONDITION
|
|
|
|
+ # 5. NO
|
|
|
|
+ # 8. DK (don't know); NA (not ascertained)
|
|
|
|
+ # 9. RF (refused)
|
|
|
|
+ # STROKE
|
|
|
|
+ # 1. Yes
|
|
|
|
+ # 5. No [Inap in V418-V421]
|
|
|
|
+ STROKE = line[146:147]
|
|
|
|
+ STROKE = np.nan if STROKE == "8" or STROKE == "9" or STROKE == "" else "5" if STROKE == "2" or STROKE == "4" or STROKE == "5" else "1"
|
|
|
|
+ # 1. MORE THAN ONCE A WEEK
|
|
|
|
+ # 2. ONCE A WEEK
|
|
|
|
+ # 3. ONCE TO THREE TIMES A MONTH
|
|
|
|
+ # 4. HARDLY EVER OR NEVER
|
|
|
|
+ # 7. (VOL) EVERY DAY
|
|
|
|
+ # 8. DK (Don't Know)
|
|
|
|
+ # 9. RF (Refused)
|
|
|
|
+ # 3. vigorous (vigorous activity more than once a week)
|
|
|
|
+ # 2. moderate (moderate activity more than once a week)
|
|
|
|
+ # 1. inactive (the rest)
|
|
|
|
+ # 重度活动
|
|
|
|
+ VIGOROUS_PHYSICAL = line[246:247]
|
|
|
|
+ VIGOROUS_PHYSICAL = "3" if VIGOROUS_PHYSICAL == "1" or VIGOROUS_PHYSICAL == "2" or VIGOROUS_PHYSICAL == "7" else "1" if VIGOROUS_PHYSICAL == "3" or VIGOROUS_PHYSICAL == "4" else np.nan
|
|
|
|
+ #中度活动
|
|
|
|
+ MODERATE_PHYSICAL = line[247:248]
|
|
|
|
+ MODERATE_PHYSICAL = "2" if MODERATE_PHYSICAL == "1" or MODERATE_PHYSICAL == "2" or MODERATE_PHYSICAL == "7" else "1" if MODERATE_PHYSICAL == "3" or MODERATE_PHYSICAL == "4" else np.nan
|
|
|
|
+ # 轻度活动
|
|
|
|
+ LIGHT_PHYSICAL = np.nan
|
|
|
|
+ PHYSICAL_ACTIVITY_LEVEL = "3" if VIGOROUS_PHYSICAL=="3" else "2" if MODERATE_PHYSICAL=="2" else "1" if VIGOROUS_PHYSICAL=="1" or MODERATE_PHYSICAL=="1" else np.nan
|
|
|
|
+ HHID_list.append(HHID)
|
|
|
|
+ PN_list.append(PN)
|
|
|
|
+ PHYSICAL_ACTIVITY_LEVEL_list.append(PHYSICAL_ACTIVITY_LEVEL)
|
|
|
|
+ SMOKED_list.append(SMOKED)
|
|
|
|
+ DRINKED_list.append(DRINKED)
|
|
|
|
+ BMI_list.append(BMI)
|
|
|
|
+ HEART_PROBL_list.append(HEART_PROBL)
|
|
|
|
+ STROKE_list.append(STROKE)
|
|
|
|
+ data = {
|
|
|
|
+ "HHID":HHID_list,
|
|
|
|
+ "PN":PN_list,
|
|
|
|
+ "SMOKED":SMOKED_list,
|
|
|
|
+ "DRINKED":DRINKED_list,
|
|
|
|
+ "BMI":BMI_list,
|
|
|
|
+ "HEART_PROBL":HEART_PROBL_list,
|
|
|
|
+ "STROKE":STROKE_list,
|
|
|
|
+ "PHYSICAL_ACTIVITY_LEVEL":PHYSICAL_ACTIVITY_LEVEL_list
|
|
|
|
+ }
|
|
|
|
+ result_2012_two = pd.DataFrame(data)
|
|
|
|
+ result_2012 = pd.merge(result_2012_one, result_2012_two, on=["HHID", "PN"], how="left")
|
|
|
|
+ with(open("/root/r_base/HRS/2012/data/H12B_R.da", "r", encoding="utf-8") )as file:
|
|
|
|
+ HHID_list = []
|
|
|
|
+ PN_list = []
|
|
|
|
+ MARITAL_STATUS_list = []
|
|
|
|
+ EDUCATION_list = []
|
|
|
|
+ # 逐行读取文件
|
|
|
|
+ for line in file:
|
|
|
|
+ HHID = line[0:6]
|
|
|
|
+ PN = line[6:9]
|
|
|
|
+ # 1. MARRIED (VOL)
|
|
|
|
+ # 2. ANULLED (VOL)
|
|
|
|
+ # 3. SEPARATED
|
|
|
|
+ # 4. DIVORCED
|
|
|
|
+ # 5. WIDOWED
|
|
|
|
+ # 6. NEVER MARRIED
|
|
|
|
+ # 7. OTHER (SPECIFY)
|
|
|
|
+ # 8. DK (Don't Know); NA (Not Ascertained)
|
|
|
|
+ # 9. RF (Refused)
|
|
|
|
+ MARITAL_STATUS = line[294:295]
|
|
|
|
+ # 1 Married or Partner; 5 other
|
|
|
|
+ MARITAL_STATUS = "1" if MARITAL_STATUS=="1" else "5" if MARITAL_STATUS=="2" or MARITAL_STATUS=="3" or MARITAL_STATUS=="4" or MARITAL_STATUS=="5" or MARITAL_STATUS=="6" or MARITAL_STATUS=="7" else np.nan
|
|
|
|
+ # 0 For no formal education
|
|
|
|
+ # 1-11 .....Grades
|
|
|
|
+ # 12 .......High school
|
|
|
|
+ # 13-15 ....Some college
|
|
|
|
+ # 16 .......College grad
|
|
|
|
+ # 17 .......Post college (17+ years)
|
|
|
|
+ # 97 .......Other
|
|
|
|
+ # 98. DK (don't know); NA (not ascertained)
|
|
|
|
+ # 99. RF (refused)
|
|
|
|
+ EDUCATION = line[48:50]
|
|
|
|
+ EDUCATION = np.nan if EDUCATION == "97" or EDUCATION == "98" or EDUCATION == "99" or EDUCATION == "" else EDUCATION
|
|
|
|
+ HHID_list.append(HHID)
|
|
|
|
+ PN_list.append(PN)
|
|
|
|
+ MARITAL_STATUS_list.append(MARITAL_STATUS)
|
|
|
|
+ EDUCATION_list.append(EDUCATION)
|
|
|
|
+ data = {
|
|
|
|
+ "HHID":HHID_list,
|
|
|
|
+ "PN":PN_list,
|
|
|
|
+ "MARITAL_STATUS":MARITAL_STATUS_list,
|
|
|
|
+ "EDUCATION":EDUCATION_list
|
|
|
|
+ }
|
|
|
|
+ result_2012_four = pd.DataFrame(data)
|
|
|
|
+ result_2012 = pd.merge(result_2012, result_2012_four, on=["HHID", "PN"], how="left")
|
|
|
|
+ result = pd.concat([result, result_2012], axis=0)
|
|
|
|
+ # 获取2014数据
|
|
|
|
+ with(open("/root/r_base/HRS/2014/data/H14PR_R.da", "r", encoding="utf-8") )as file:
|
|
|
|
+ HHID_list = []
|
|
|
|
+ PN_list = []
|
|
|
|
+ BIRTH_YEAR_list = []
|
|
|
|
+ SEX_list = []
|
|
|
|
+ # 逐行读取文件
|
|
|
|
+ for line in file:
|
|
|
|
+ HHID = line[0:6]
|
|
|
|
+ PN = line[6:9]
|
|
|
|
+ BIRTH_YEAR = line[25:29] if not line[25:29] == "" else np.nan
|
|
|
|
+ # 1.Male
|
|
|
|
+ # 2.Female
|
|
|
|
+ SEX = line[20:21] if not line[20:21] == "" else np.nan
|
|
|
|
+ HHID_list.append(HHID)
|
|
|
|
+ PN_list.append(PN)
|
|
|
|
+ BIRTH_YEAR_list.append(BIRTH_YEAR)
|
|
|
|
+ SEX_list.append(SEX)
|
|
|
|
+ data = {
|
|
|
|
+ "HHID":HHID_list,
|
|
|
|
+ "PN":PN_list,
|
|
|
|
+ "BIRTH_YEAR":BIRTH_YEAR_list,
|
|
|
|
+ "SEX":SEX_list,
|
|
|
|
+ }
|
|
|
|
+ data["WAVE"] = 2014
|
|
|
|
+ result_2014_one = pd.DataFrame(data)
|
|
|
|
+ with(open("/root/r_base/HRS/2014/data/H14C_R.da", "r", encoding="utf-8") )as file:
|
|
|
|
+ HHID_list = []
|
|
|
|
+ PN_list = []
|
|
|
|
+ SMOKED_list = []
|
|
|
|
+ DRINKED_list = []
|
|
|
|
+ BMI_list = []
|
|
|
|
+ HEART_PROBL_list = []
|
|
|
|
+ PHYSICAL_ACTIVITY_LEVEL_list = []
|
|
|
|
+ STROKE_list=[]
|
|
|
|
+ # 逐行读取文件
|
|
|
|
+ for line in file:
|
|
|
|
+ HHID = line[0:6]
|
|
|
|
+ PN = line[6:9]
|
|
|
|
+ # 1. YES
|
|
|
|
+ # 5. NO
|
|
|
|
+ SMOKED = line[214:215]
|
|
|
|
+ SMOKED = SMOKED if SMOKED == "1" or SMOKED == "5" else np.nan
|
|
|
|
+ # 1. YES
|
|
|
|
+ # 3. [VOL] NEVER HAVE USED ALCOHOL
|
|
|
|
+ # 5. NO
|
|
|
|
+ # 8. DK (don't know); NA (not ascertained)
|
|
|
|
+ # 9. RF (refused)
|
|
|
|
+ # 合并后
|
|
|
|
+ # 1. Yes
|
|
|
|
+ # 5. No [Inap in V502-V505]
|
|
|
|
+ DRINKED = line[239:240]
|
|
|
|
+ DRINKED = "1" if DRINKED == "1" else "5" if DRINKED == "5" or DRINKED == "3" else np.nan
|
|
|
|
+ # 3. vigorous (vigorous activity more than once a week)
|
|
|
|
+ # 2. moderate (moderate activity more than once a week)
|
|
|
|
+ # 1. inactive (the rest)
|
|
|
|
+ # 体重
|
|
|
|
+ WEIGH = line[250:253].strip()
|
|
|
|
+ WEIGH= float(WEIGH)*0.45359237 if not WEIGH=="" and not float(WEIGH)>400 else np.nan
|
|
|
|
+ # 身高
|
|
|
|
+ HEIGHT_FEET = line[259:260].strip()
|
|
|
|
+ HEIGHT_INCHES = line[260:265]
|
|
|
|
+ if not HEIGHT_FEET == "" and not HEIGHT_FEET == "8" and not HEIGHT_FEET == "9" and not pd.isna(WEIGH):
|
|
|
|
+ if not HEIGHT_INCHES == "98" and not HEIGHT_INCHES=="":
|
|
|
|
+ HEIGHT = float(HEIGHT_FEET)*0.3048 + float(HEIGHT_INCHES)*0.0254
|
|
|
|
+ # BMI
|
|
|
|
+ BMI = WEIGH / math.pow(HEIGHT,2)
|
|
|
|
+ else:
|
|
|
|
+ HEIGHT = float(HEIGHT_FEET)*0.3048
|
|
|
|
+ # BMI
|
|
|
|
+ BMI = WEIGH / math.pow(HEIGHT,2)
|
|
|
|
+ else :
|
|
|
|
+ BMI = np.nan
|
|
|
|
+ # 1. YES
|
|
|
|
+ # 3. DISPUTES PREVIOUS WAVE RECORD, BUT NOW HAS CONDITION
|
|
|
|
+ # 4. DISPUTES PREVIOUS WAVE RECORD, DOES NOT HAVE CONDITION
|
|
|
|
+ # 5. NO
|
|
|
|
+ # 8. DK (don't know); NA (not ascertained)
|
|
|
|
+ # 9. RF (refused)
|
|
|
|
+ # A heart attack, coronary heart disease, angina, congestive heart failure, or other heart problems
|
|
|
|
+ # 1. Yes
|
|
|
|
+ # 5. No [Inap in V418-V421]
|
|
|
|
+ HEART_PROBL = line[66:67]
|
|
|
|
+ HEART_PROBL = np.nan if HEART_PROBL == "8" or HEART_PROBL == "9" or HEART_PROBL == "" else "1" if HEART_PROBL == "1" or HEART_PROBL == "3" else "5"
|
|
|
|
+ # 1. YES
|
|
|
|
+ # 2. [VOL] POSSIBLE STROKE OR TIA (TRANSIENT ISCHEMIC ATTACK)
|
|
|
|
+ # 3. DISPUTES PREVIOUS WAVE RECORD, BUT NOW HAS CONDITION
|
|
|
|
+ # 4. DISPUTES PREVIOUS WAVE RECORD, DOES NOT HAVE CONDITION
|
|
|
|
+ # 5. NO
|
|
|
|
+ # 8. DK (don't know); NA (not ascertained)
|
|
|
|
+ # 9. RF (refused)
|
|
|
|
+ # STROKE
|
|
|
|
+ # 1. Yes
|
|
|
|
+ # 5. No [Inap in V418-V421]
|
|
|
|
+ STROKE = line[122:123]
|
|
|
|
+ STROKE = np.nan if STROKE == "8" or STROKE == "9" or STROKE == "" else "5" if STROKE == "2" or STROKE == "4" or STROKE == "5" else "1"
|
|
|
|
+ # 1. MORE THAN ONCE A WEEK
|
|
|
|
+ # 2. ONCE A WEEK
|
|
|
|
+ # 3. ONCE TO THREE TIMES A MONTH
|
|
|
|
+ # 4. HARDLY EVER OR NEVER
|
|
|
|
+ # 7. (VOL) EVERY DAY
|
|
|
|
+ # 8. DK (Don't Know)
|
|
|
|
+ # 9. RF (Refused)
|
|
|
|
+ # 3. vigorous (vigorous activity more than once a week)
|
|
|
|
+ # 2. moderate (moderate activity more than once a week)
|
|
|
|
+ # 1. inactive (the rest)
|
|
|
|
+ # 重度活动
|
|
|
|
+ VIGOROUS_PHYSICAL = line[211:212]
|
|
|
|
+ VIGOROUS_PHYSICAL = "3" if VIGOROUS_PHYSICAL == "1" or VIGOROUS_PHYSICAL == "2" or VIGOROUS_PHYSICAL == "7" else "1" if VIGOROUS_PHYSICAL == "3" or VIGOROUS_PHYSICAL == "4" else np.nan
|
|
|
|
+ #中度活动
|
|
|
|
+ MODERATE_PHYSICAL = line[212:213]
|
|
|
|
+ MODERATE_PHYSICAL = "2" if MODERATE_PHYSICAL == "1" or MODERATE_PHYSICAL == "2" or MODERATE_PHYSICAL == "7" else "1" if MODERATE_PHYSICAL == "3" or MODERATE_PHYSICAL == "4" else np.nan
|
|
|
|
+ # 轻度活动
|
|
|
|
+ LIGHT_PHYSICAL = np.nan
|
|
|
|
+ PHYSICAL_ACTIVITY_LEVEL = "3" if VIGOROUS_PHYSICAL=="3" else "2" if MODERATE_PHYSICAL=="2" else "1" if VIGOROUS_PHYSICAL=="1" or MODERATE_PHYSICAL=="1" else np.nan
|
|
|
|
+ HHID_list.append(HHID)
|
|
|
|
+ PN_list.append(PN)
|
|
|
|
+ PHYSICAL_ACTIVITY_LEVEL_list.append(PHYSICAL_ACTIVITY_LEVEL)
|
|
|
|
+ SMOKED_list.append(SMOKED)
|
|
|
|
+ DRINKED_list.append(DRINKED)
|
|
|
|
+ BMI_list.append(BMI)
|
|
|
|
+ HEART_PROBL_list.append(HEART_PROBL)
|
|
|
|
+ STROKE_list.append(STROKE)
|
|
|
|
+ data = {
|
|
|
|
+ "HHID":HHID_list,
|
|
|
|
+ "PN":PN_list,
|
|
|
|
+ "SMOKED":SMOKED_list,
|
|
|
|
+ "DRINKED":DRINKED_list,
|
|
|
|
+ "BMI":BMI_list,
|
|
|
|
+ "HEART_PROBL":HEART_PROBL_list,
|
|
|
|
+ "STROKE":STROKE_list,
|
|
|
|
+ "PHYSICAL_ACTIVITY_LEVEL":PHYSICAL_ACTIVITY_LEVEL_list
|
|
|
|
+ }
|
|
|
|
+ result_2014_two = pd.DataFrame(data)
|
|
|
|
+ result_2014 = pd.merge(result_2014_one, result_2014_two, on=["HHID", "PN"], how="left")
|
|
|
|
+ with(open("/root/r_base/HRS/2014/data/H14B_R.da", "r", encoding="utf-8") )as file:
|
|
|
|
+ HHID_list = []
|
|
|
|
+ PN_list = []
|
|
|
|
+ MARITAL_STATUS_list = []
|
|
|
|
+ EDUCATION_list = []
|
|
|
|
+ # 逐行读取文件
|
|
|
|
+ for line in file:
|
|
|
|
+ HHID = line[0:6]
|
|
|
|
+ PN = line[6:9]
|
|
|
|
+ # 1. MARRIED (VOL)
|
|
|
|
+ # 2. ANULLED (VOL)
|
|
|
|
+ # 3. SEPARATED
|
|
|
|
+ # 4. DIVORCED
|
|
|
|
+ # 5. WIDOWED
|
|
|
|
+ # 6. NEVER MARRIED
|
|
|
|
+ # 7. OTHER (SPECIFY)
|
|
|
|
+ # 8. DK (Don't Know); NA (Not Ascertained)
|
|
|
|
+ # 9. RF (Refused)
|
|
|
|
+ MARITAL_STATUS = line[274:275]
|
|
|
|
+ # 1 Married or Partner; 5 other
|
|
|
|
+ MARITAL_STATUS = "1" if MARITAL_STATUS=="1" else "5" if MARITAL_STATUS=="2" or MARITAL_STATUS=="3" or MARITAL_STATUS=="4" or MARITAL_STATUS=="5" or MARITAL_STATUS=="6" or MARITAL_STATUS=="7" else np.nan
|
|
|
|
+ # 0 For no formal education
|
|
|
|
+ # 1-11 .....Grades
|
|
|
|
+ # 12 .......High school
|
|
|
|
+ # 13-15 ....Some college
|
|
|
|
+ # 16 .......College grad
|
|
|
|
+ # 17 .......Post college (17+ years)
|
|
|
|
+ # 97 .......Other
|
|
|
|
+ # 98. DK (don't know); NA (not ascertained)
|
|
|
|
+ # 99. RF (refused)
|
|
|
|
+ EDUCATION = line[39:41]
|
|
|
|
+ EDUCATION = np.nan if EDUCATION == "97" or EDUCATION == "98" or EDUCATION == "99" or EDUCATION == "" else EDUCATION
|
|
|
|
+ HHID_list.append(HHID)
|
|
|
|
+ PN_list.append(PN)
|
|
|
|
+ MARITAL_STATUS_list.append(MARITAL_STATUS)
|
|
|
|
+ EDUCATION_list.append(EDUCATION)
|
|
|
|
+ data = {
|
|
|
|
+ "HHID":HHID_list,
|
|
|
|
+ "PN":PN_list,
|
|
|
|
+ "MARITAL_STATUS":MARITAL_STATUS_list,
|
|
|
|
+ "EDUCATION":EDUCATION_list
|
|
|
|
+ }
|
|
|
|
+ result_2014_four = pd.DataFrame(data)
|
|
|
|
+ result_2014 = pd.merge(result_2014, result_2014_four, on=["HHID", "PN"], how="left")
|
|
|
|
+ result = pd.concat([result, result_2014], axis=0)
|
|
|
|
+ # 获取2016数据
|
|
|
|
+ with(open("/root/r_base/HRS/2016/data/H16PR_R.da", "r", encoding="utf-8") )as file:
|
|
|
|
+ HHID_list = []
|
|
|
|
+ PN_list = []
|
|
|
|
+ BIRTH_YEAR_list = []
|
|
|
|
+ SEX_list = []
|
|
|
|
+ # 逐行读取文件
|
|
|
|
+ for line in file:
|
|
|
|
+ HHID = line[0:6]
|
|
|
|
+ PN = line[6:9]
|
|
|
|
+ BIRTH_YEAR = line[25:29] if not line[25:29] == "" else np.nan
|
|
|
|
+ # 1.Male
|
|
|
|
+ # 2.Female
|
|
|
|
+ SEX = line[20:21] if not line[20:21] == "" else np.nan
|
|
|
|
+ HHID_list.append(HHID)
|
|
|
|
+ PN_list.append(PN)
|
|
|
|
+ BIRTH_YEAR_list.append(BIRTH_YEAR)
|
|
|
|
+ SEX_list.append(SEX)
|
|
|
|
+ data = {
|
|
|
|
+ "HHID":HHID_list,
|
|
|
|
+ "PN":PN_list,
|
|
|
|
+ "BIRTH_YEAR":BIRTH_YEAR_list,
|
|
|
|
+ "SEX":SEX_list,
|
|
|
|
+ }
|
|
|
|
+ data["WAVE"] = 2016
|
|
|
|
+ result_2016_one = pd.DataFrame(data)
|
|
|
|
+ with(open("/root/r_base/HRS/2016/data/H16C_R.da", "r", encoding="utf-8") )as file:
|
|
|
|
+ HHID_list = []
|
|
|
|
+ PN_list = []
|
|
|
|
+ SMOKED_list = []
|
|
|
|
+ DRINKED_list = []
|
|
|
|
+ BMI_list = []
|
|
|
|
+ HEART_PROBL_list = []
|
|
|
|
+ PHYSICAL_ACTIVITY_LEVEL_list = []
|
|
|
|
+ STROKE_list=[]
|
|
|
|
+ # 逐行读取文件
|
|
|
|
+ for line in file:
|
|
|
|
+ HHID = line[0:6]
|
|
|
|
+ PN = line[6:9]
|
|
|
|
+ # 1. YES
|
|
|
|
+ # 5. NO
|
|
|
|
+ SMOKED = line[237:238]
|
|
|
|
+ SMOKED = SMOKED if SMOKED == "1" or SMOKED == "5" else np.nan
|
|
|
|
+ # 1. YES
|
|
|
|
+ # 3. [VOL] NEVER HAVE USED ALCOHOL
|
|
|
|
+ # 5. NO
|
|
|
|
+ # 8. DK (don't know); NA (not ascertained)
|
|
|
|
+ # 9. RF (refused)
|
|
|
|
+ # 合并后
|
|
|
|
+ # 1. Yes
|
|
|
|
+ # 5. No [Inap in V502-V505]
|
|
|
|
+ DRINKED = line[262:263]
|
|
|
|
+ DRINKED = "1" if DRINKED == "1" else "5" if DRINKED == "5" or DRINKED == "3" else np.nan
|
|
|
|
+ # 3. vigorous (vigorous activity more than once a week)
|
|
|
|
+ # 2. moderate (moderate activity more than once a week)
|
|
|
|
+ # 1. inactive (the rest)
|
|
|
|
+ # 体重
|
|
|
|
+ WEIGH = line[273:276].strip()
|
|
|
|
+ WEIGH= float(WEIGH)*0.45359237 if not WEIGH=="" and not float(WEIGH)>400 else np.nan
|
|
|
|
+ # 身高
|
|
|
|
+ HEIGHT_FEET = line[282:283].strip()
|
|
|
|
+ HEIGHT_INCHES = line[283:288]
|
|
|
|
+ if not HEIGHT_FEET == "" and not HEIGHT_FEET == "8" and not HEIGHT_FEET == "9" and not pd.isna(WEIGH):
|
|
|
|
+ if not HEIGHT_INCHES == "98" and not HEIGHT_INCHES=="":
|
|
|
|
+ HEIGHT = float(HEIGHT_FEET)*0.3048 + float(HEIGHT_INCHES)*0.0254
|
|
|
|
+ # BMI
|
|
|
|
+ BMI = WEIGH / math.pow(HEIGHT,2)
|
|
|
|
+ else:
|
|
|
|
+ HEIGHT = float(HEIGHT_FEET)*0.3048
|
|
|
|
+ # BMI
|
|
|
|
+ BMI = WEIGH / math.pow(HEIGHT,2)
|
|
|
|
+ else :
|
|
|
|
+ BMI = np.nan
|
|
|
|
+ # 1. YES
|
|
|
|
+ # 3. DISPUTES PREVIOUS WAVE RECORD, BUT NOW HAS CONDITION
|
|
|
|
+ # 4. DISPUTES PREVIOUS WAVE RECORD, DOES NOT HAVE CONDITION
|
|
|
|
+ # 5. NO
|
|
|
|
+ # 8. DK (don't know); NA (not ascertained)
|
|
|
|
+ # 9. RF (refused)
|
|
|
|
+ # A heart attack, coronary heart disease, angina, congestive heart failure, or other heart problems
|
|
|
|
+ # 1. Yes
|
|
|
|
+ # 5. No [Inap in V418-V421]
|
|
|
|
+ HEART_PROBL = line[67:68]
|
|
|
|
+ HEART_PROBL = np.nan if HEART_PROBL == "8" or HEART_PROBL == "9" or HEART_PROBL == "" else "1" if HEART_PROBL == "1" or HEART_PROBL == "3" else "5"
|
|
|
|
+ # 1. YES
|
|
|
|
+ # 2. [VOL] POSSIBLE STROKE OR TIA (TRANSIENT ISCHEMIC ATTACK)
|
|
|
|
+ # 3. DISPUTES PREVIOUS WAVE RECORD, BUT NOW HAS CONDITION
|
|
|
|
+ # 4. DISPUTES PREVIOUS WAVE RECORD, DOES NOT HAVE CONDITION
|
|
|
|
+ # 5. NO
|
|
|
|
+ # 8. DK (don't know); NA (not ascertained)
|
|
|
|
+ # 9. RF (refused)
|
|
|
|
+ # STROKE
|
|
|
|
+ # 1. Yes
|
|
|
|
+ # 5. No [Inap in V418-V421]
|
|
|
|
+ STROKE = line[123:124]
|
|
|
|
+ STROKE = np.nan if STROKE == "8" or STROKE == "9" or STROKE == "" else "5" if STROKE == "2" or STROKE == "4" or STROKE == "5" else "1"
|
|
|
|
+ # 1. MORE THAN ONCE A WEEK
|
|
|
|
+ # 2. ONCE A WEEK
|
|
|
|
+ # 3. ONCE TO THREE TIMES A MONTH
|
|
|
|
+ # 4. HARDLY EVER OR NEVER
|
|
|
|
+ # 7. (VOL) EVERY DAY
|
|
|
|
+ # 8. DK (Don't Know)
|
|
|
|
+ # 9. RF (Refused)
|
|
|
|
+ # 3. vigorous (vigorous activity more than once a week)
|
|
|
|
+ # 2. moderate (moderate activity more than once a week)
|
|
|
|
+ # 1. inactive (the rest)
|
|
|
|
+ # 重度活动
|
|
|
|
+ VIGOROUS_PHYSICAL = line[234:235]
|
|
|
|
+ VIGOROUS_PHYSICAL = "3" if VIGOROUS_PHYSICAL == "1" or VIGOROUS_PHYSICAL == "2" or VIGOROUS_PHYSICAL == "7" else "1" if VIGOROUS_PHYSICAL == "3" or VIGOROUS_PHYSICAL == "4" else np.nan
|
|
|
|
+ #中度活动
|
|
|
|
+ MODERATE_PHYSICAL = line[235:236]
|
|
|
|
+ MODERATE_PHYSICAL = "2" if MODERATE_PHYSICAL == "1" or MODERATE_PHYSICAL == "2" or MODERATE_PHYSICAL == "7" else "1" if MODERATE_PHYSICAL == "3" or MODERATE_PHYSICAL == "4" else np.nan
|
|
|
|
+ # 轻度活动
|
|
|
|
+ LIGHT_PHYSICAL = np.nan
|
|
|
|
+ PHYSICAL_ACTIVITY_LEVEL = "3" if VIGOROUS_PHYSICAL=="3" else "2" if MODERATE_PHYSICAL=="2" else "1" if VIGOROUS_PHYSICAL=="1" or MODERATE_PHYSICAL=="1" else np.nan
|
|
|
|
+ HHID_list.append(HHID)
|
|
|
|
+ PN_list.append(PN)
|
|
|
|
+ PHYSICAL_ACTIVITY_LEVEL_list.append(PHYSICAL_ACTIVITY_LEVEL)
|
|
|
|
+ SMOKED_list.append(SMOKED)
|
|
|
|
+ DRINKED_list.append(DRINKED)
|
|
|
|
+ BMI_list.append(BMI)
|
|
|
|
+ HEART_PROBL_list.append(HEART_PROBL)
|
|
|
|
+ STROKE_list.append(STROKE)
|
|
|
|
+ data = {
|
|
|
|
+ "HHID":HHID_list,
|
|
|
|
+ "PN":PN_list,
|
|
|
|
+ "SMOKED":SMOKED_list,
|
|
|
|
+ "DRINKED":DRINKED_list,
|
|
|
|
+ "BMI":BMI_list,
|
|
|
|
+ "HEART_PROBL":HEART_PROBL_list,
|
|
|
|
+ "STROKE":STROKE_list,
|
|
|
|
+ "PHYSICAL_ACTIVITY_LEVEL":PHYSICAL_ACTIVITY_LEVEL_list
|
|
|
|
+ }
|
|
|
|
+ result_2016_two = pd.DataFrame(data)
|
|
|
|
+ result_2016 = pd.merge(result_2016_one, result_2016_two, on=["HHID", "PN"], how="left")
|
|
|
|
+ with(open("/root/r_base/HRS/2016/data/H16B_R.da", "r", encoding="utf-8") )as file:
|
|
|
|
+ HHID_list = []
|
|
|
|
+ PN_list = []
|
|
|
|
+ MARITAL_STATUS_list = []
|
|
|
|
+ EDUCATION_list = []
|
|
|
|
+ # 逐行读取文件
|
|
|
|
+ for line in file:
|
|
|
|
+ HHID = line[0:6]
|
|
|
|
+ PN = line[6:9]
|
|
|
|
+ # 1. MARRIED (VOL)
|
|
|
|
+ # 2. ANULLED (VOL)
|
|
|
|
+ # 3. SEPARATED
|
|
|
|
+ # 4. DIVORCED
|
|
|
|
+ # 5. WIDOWED
|
|
|
|
+ # 6. NEVER MARRIED
|
|
|
|
+ # 7. OTHER (SPECIFY)
|
|
|
|
+ # 8. DK (Don't Know); NA (Not Ascertained)
|
|
|
|
+ # 9. RF (Refused)
|
|
|
|
+ MARITAL_STATUS = line[275:276]
|
|
|
|
+ # 1 Married or Partner; 5 other
|
|
|
|
+ MARITAL_STATUS = "1" if MARITAL_STATUS=="1" else "5" if MARITAL_STATUS=="2" or MARITAL_STATUS=="3" or MARITAL_STATUS=="4" or MARITAL_STATUS=="5" or MARITAL_STATUS=="6" or MARITAL_STATUS=="7" else np.nan
|
|
|
|
+ # 0 For no formal education
|
|
|
|
+ # 1-11 .....Grades
|
|
|
|
+ # 12 .......High school
|
|
|
|
+ # 13-15 ....Some college
|
|
|
|
+ # 16 .......College grad
|
|
|
|
+ # 17 .......Post college (17+ years)
|
|
|
|
+ # 97 .......Other
|
|
|
|
+ # 98. DK (don't know); NA (not ascertained)
|
|
|
|
+ # 99. RF (refused)
|
|
|
|
+ EDUCATION = line[39:41]
|
|
|
|
+ EDUCATION = np.nan if EDUCATION == "97" or EDUCATION == "98" or EDUCATION == "99" or EDUCATION == "" else EDUCATION
|
|
|
|
+ HHID_list.append(HHID)
|
|
|
|
+ PN_list.append(PN)
|
|
|
|
+ MARITAL_STATUS_list.append(MARITAL_STATUS)
|
|
|
|
+ EDUCATION_list.append(EDUCATION)
|
|
|
|
+ data = {
|
|
|
|
+ "HHID":HHID_list,
|
|
|
|
+ "PN":PN_list,
|
|
|
|
+ "MARITAL_STATUS":MARITAL_STATUS_list,
|
|
|
|
+ "EDUCATION":EDUCATION_list
|
|
|
|
+ }
|
|
|
|
+ result_2016_four = pd.DataFrame(data)
|
|
|
|
+ result_2016 = pd.merge(result_2016, result_2016_four, on=["HHID", "PN"], how="left")
|
|
|
|
+ result = pd.concat([result, result_2016], axis=0)
|
|
|
|
+ # 获取2018数据
|
|
|
|
+ with(open("/root/r_base/HRS/2018/data/H18PR_R.da", "r", encoding="utf-8") )as file:
|
|
|
|
+ HHID_list = []
|
|
|
|
+ PN_list = []
|
|
|
|
+ BIRTH_YEAR_list = []
|
|
|
|
+ SEX_list = []
|
|
|
|
+ # 逐行读取文件
|
|
|
|
+ for line in file:
|
|
|
|
+ HHID = line[0:6]
|
|
|
|
+ PN = line[6:9]
|
|
|
|
+ BIRTH_YEAR = line[25:29] if not line[25:29] == "" else np.nan
|
|
|
|
+ # 1.Male
|
|
|
|
+ # 2.Female
|
|
|
|
+ SEX = line[20:21] if not line[20:21] == "" else np.nan
|
|
|
|
+ HHID_list.append(HHID)
|
|
|
|
+ PN_list.append(PN)
|
|
|
|
+ BIRTH_YEAR_list.append(BIRTH_YEAR)
|
|
|
|
+ SEX_list.append(SEX)
|
|
|
|
+ data = {
|
|
|
|
+ "HHID":HHID_list,
|
|
|
|
+ "PN":PN_list,
|
|
|
|
+ "BIRTH_YEAR":BIRTH_YEAR_list,
|
|
|
|
+ "SEX":SEX_list,
|
|
|
|
+ }
|
|
|
|
+ data["WAVE"] = 2018
|
|
|
|
+ result_2018_one = pd.DataFrame(data)
|
|
|
|
+ with(open("/root/r_base/HRS/2018/data/H18C_R.da", "r", encoding="utf-8") )as file:
|
|
|
|
+ HHID_list = []
|
|
|
|
+ PN_list = []
|
|
|
|
+ SMOKED_list = []
|
|
|
|
+ DRINKED_list = []
|
|
|
|
+ BMI_list = []
|
|
|
|
+ HEART_PROBL_list = []
|
|
|
|
+ PHYSICAL_ACTIVITY_LEVEL_list = []
|
|
|
|
+ STROKE_list=[]
|
|
|
|
+ # 逐行读取文件
|
|
|
|
+ for line in file:
|
|
|
|
+ HHID = line[0:6]
|
|
|
|
+ PN = line[6:9]
|
|
|
|
+ # 1. YES
|
|
|
|
+ # 5. NO
|
|
|
|
+ SMOKED = line[381:382]
|
|
|
|
+ SMOKED = SMOKED if SMOKED == "1" or SMOKED == "5" else np.nan
|
|
|
|
+ # 1. YES
|
|
|
|
+ # 3. [VOL] NEVER HAVE USED ALCOHOL
|
|
|
|
+ # 5. NO
|
|
|
|
+ # 8. DK (don't know); NA (not ascertained)
|
|
|
|
+ # 9. RF (refused)
|
|
|
|
+ # 合并后
|
|
|
|
+ # 1. Yes
|
|
|
|
+ # 5. No [Inap in V502-V505]
|
|
|
|
+ DRINKED = line[404:406]
|
|
|
|
+ DRINKED = "1" if DRINKED == "1" else "5" if DRINKED == "5" or DRINKED == "3" else np.nan
|
|
|
|
+ # 3. vigorous (vigorous activity more than once a week)
|
|
|
|
+ # 2. moderate (moderate activity more than once a week)
|
|
|
|
+ # 1. inactive (the rest)
|
|
|
|
+ # 体重
|
|
|
|
+ WEIGH = line[418:421].strip()
|
|
|
|
+ WEIGH= float(WEIGH)*0.45359237 if not WEIGH=="" and not float(WEIGH)>400 and not float(WEIGH)<0 else np.nan
|
|
|
|
+ # 身高
|
|
|
|
+ HEIGHT_FEET = line[428:430].strip()
|
|
|
|
+ HEIGHT_INCHES = line[430:435].strip()
|
|
|
|
+ if not HEIGHT_FEET == "" and not HEIGHT_FEET == "8" and not HEIGHT_FEET == "9" and not pd.isna(WEIGH):
|
|
|
|
+ if not HEIGHT_INCHES == "98" and not HEIGHT_INCHES == "99" and not HEIGHT_INCHES=="":
|
|
|
|
+ HEIGHT = float(HEIGHT_FEET)*0.3048 + float(HEIGHT_INCHES)*0.0254
|
|
|
|
+ # BMI
|
|
|
|
+ BMI = WEIGH / math.pow(HEIGHT,2)
|
|
|
|
+ else:
|
|
|
|
+ HEIGHT = float(HEIGHT_FEET)*0.3048
|
|
|
|
+ # BMI
|
|
|
|
+ BMI = WEIGH / math.pow(HEIGHT,2)
|
|
|
|
+ else :
|
|
|
|
+ BMI = np.nan
|
|
|
|
+ # -8. Web non-response
|
|
|
|
+ # 1. YES
|
|
|
|
+ # 4. [NEVER HAD HEART PROBLEM]
|
|
|
|
+ # 5. NO
|
|
|
|
+ # 6. [HAD HEART PROBLEM BEFORE, BUT DO NOT HAVE IT NOW AND AM NOT
|
|
|
|
+ # TAKING MEDICATION FOR IT]
|
|
|
|
+ # 8. DK (Don't Know); NA (Not Ascertained)
|
|
|
|
+ # 9. RF (Refused)
|
|
|
|
+ # A heart attack, coronary heart disease, angina, congestive heart failure, or other heart problems
|
|
|
|
+ # 1. Yes
|
|
|
|
+ # 5. No [Inap in V418-V421]
|
|
|
|
+ HEART_PROBL = line[86:88]
|
|
|
|
+ HEART_PROBL = np.nan if HEART_PROBL == "8" or HEART_PROBL == "9" or HEART_PROBL == "" or HEART_PROBL == "-8" else "1" if HEART_PROBL == "1" else "5"
|
|
|
|
+ # -8. Web non-response
|
|
|
|
+ # 1. YES
|
|
|
|
+ # 2. [[VOL] POSSIBLE STROKE OR TIA (TRANSIENT ISCHEMIC
|
|
|
|
+ # ATTACK)/POSSIBLE STROKE OR TIA (TRANSIENT ISCHEMIC ATTACK)]
|
|
|
|
+ # 4. [NEVER HAD A STROKE]
|
|
|
|
+ # 5. NO
|
|
|
|
+ # 8. DK (Don't Know); NA (Not Ascertained)
|
|
|
|
+ # 9. RF (Refused)
|
|
|
|
+ # STROKE
|
|
|
|
+ # 1. Yes
|
|
|
|
+ # 5. No [Inap in V418-V421]
|
|
|
|
+ STROKE = line[162:164]
|
|
|
|
+ STROKE = np.nan if STROKE == "8" or STROKE == "9" or STROKE == "" or STROKE == "-8" else "5" if STROKE == "2" or STROKE == "4" or STROKE == "5" else "1"
|
|
|
|
+ # 1. MORE THAN ONCE A WEEK
|
|
|
|
+ # 2. ONCE A WEEK
|
|
|
|
+ # 3. ONCE TO THREE TIMES A MONTH
|
|
|
|
+ # 4. HARDLY EVER OR NEVER
|
|
|
|
+ # 7. (VOL) EVERY DAY
|
|
|
|
+ # 8. DK (Don't Know)
|
|
|
|
+ # 9. RF (Refused)
|
|
|
|
+ # 3. vigorous (vigorous activity more than once a week)
|
|
|
|
+ # 2. moderate (moderate activity more than once a week)
|
|
|
|
+ # 1. inactive (the rest)
|
|
|
|
+ # 重度活动
|
|
|
|
+ VIGOROUS_PHYSICAL = line[367:369]
|
|
|
|
+ VIGOROUS_PHYSICAL = "3" if VIGOROUS_PHYSICAL == "1" or VIGOROUS_PHYSICAL == "2" or VIGOROUS_PHYSICAL == "7" else "1" if VIGOROUS_PHYSICAL == "3" or VIGOROUS_PHYSICAL == "4" else np.nan
|
|
|
|
+ #中度活动
|
|
|
|
+ MODERATE_PHYSICAL = line[369:371]
|
|
|
|
+ MODERATE_PHYSICAL = "2" if MODERATE_PHYSICAL == "1" or MODERATE_PHYSICAL == "2" or MODERATE_PHYSICAL == "7" else "1" if MODERATE_PHYSICAL == "3" or MODERATE_PHYSICAL == "4" else np.nan
|
|
|
|
+ # 轻度活动
|
|
|
|
+ LIGHT_PHYSICAL = np.nan
|
|
|
|
+ PHYSICAL_ACTIVITY_LEVEL = "3" if VIGOROUS_PHYSICAL=="3" else "2" if MODERATE_PHYSICAL=="2" else "1" if VIGOROUS_PHYSICAL=="1" or MODERATE_PHYSICAL=="1" else np.nan
|
|
|
|
+ HHID_list.append(HHID)
|
|
|
|
+ PN_list.append(PN)
|
|
|
|
+ PHYSICAL_ACTIVITY_LEVEL_list.append(PHYSICAL_ACTIVITY_LEVEL)
|
|
|
|
+ SMOKED_list.append(SMOKED)
|
|
|
|
+ DRINKED_list.append(DRINKED)
|
|
|
|
+ BMI_list.append(BMI)
|
|
|
|
+ HEART_PROBL_list.append(HEART_PROBL)
|
|
|
|
+ STROKE_list.append(STROKE)
|
|
|
|
+ data = {
|
|
|
|
+ "HHID":HHID_list,
|
|
|
|
+ "PN":PN_list,
|
|
|
|
+ "SMOKED":SMOKED_list,
|
|
|
|
+ "DRINKED":DRINKED_list,
|
|
|
|
+ "BMI":BMI_list,
|
|
|
|
+ "HEART_PROBL":HEART_PROBL_list,
|
|
|
|
+ "STROKE":STROKE_list,
|
|
|
|
+ "PHYSICAL_ACTIVITY_LEVEL":PHYSICAL_ACTIVITY_LEVEL_list
|
|
|
|
+ }
|
|
|
|
+ result_2018_two = pd.DataFrame(data)
|
|
|
|
+ result_2018 = pd.merge(result_2018_one, result_2018_two, on=["HHID", "PN"], how="left")
|
|
|
|
+ with(open("/root/r_base/HRS/2018/data/H18B_R.da", "r", encoding="utf-8") )as file:
|
|
|
|
+ HHID_list = []
|
|
|
|
+ PN_list = []
|
|
|
|
+ MARITAL_STATUS_list = []
|
|
|
|
+ EDUCATION_list = []
|
|
|
|
+ # 逐行读取文件
|
|
|
|
+ for line in file:
|
|
|
|
+ HHID = line[0:6]
|
|
|
|
+ PN = line[6:9]
|
|
|
|
+ # 1. MARRIED (VOL)
|
|
|
|
+ # 2. ANULLED (VOL)
|
|
|
|
+ # 3. SEPARATED
|
|
|
|
+ # 4. DIVORCED
|
|
|
|
+ # 5. WIDOWED
|
|
|
|
+ # 6. NEVER MARRIED
|
|
|
|
+ # 7. OTHER (SPECIFY)
|
|
|
|
+ # 8. DK (Don't Know); NA (Not Ascertained)
|
|
|
|
+ # 9. RF (Refused)
|
|
|
|
+ MARITAL_STATUS = line[287:288]
|
|
|
|
+ # 1 Married or Partner; 5 other
|
|
|
|
+ MARITAL_STATUS = "1" if MARITAL_STATUS=="1" else "5" if MARITAL_STATUS=="2" or MARITAL_STATUS=="3" or MARITAL_STATUS=="4" or MARITAL_STATUS=="5" or MARITAL_STATUS=="6" or MARITAL_STATUS=="7" else np.nan
|
|
|
|
+ # 0 For no formal education
|
|
|
|
+ # 1-11 .....Grades
|
|
|
|
+ # 12 .......High school
|
|
|
|
+ # 13-15 ....Some college
|
|
|
|
+ # 16 .......College grad
|
|
|
|
+ # 17 .......Post college (17+ years)
|
|
|
|
+ # 97 .......Other
|
|
|
|
+ # 98. DK (don't know); NA (not ascertained)
|
|
|
|
+ # 99. RF (refused)
|
|
|
|
+ EDUCATION = line[41:43]
|
|
|
|
+ EDUCATION = np.nan if EDUCATION == "97" or EDUCATION == "98" or EDUCATION == "99" or EDUCATION == "" else EDUCATION
|
|
|
|
+ HHID_list.append(HHID)
|
|
|
|
+ PN_list.append(PN)
|
|
|
|
+ MARITAL_STATUS_list.append(MARITAL_STATUS)
|
|
|
|
+ EDUCATION_list.append(EDUCATION)
|
|
|
|
+ data = {
|
|
|
|
+ "HHID":HHID_list,
|
|
|
|
+ "PN":PN_list,
|
|
|
|
+ "MARITAL_STATUS":MARITAL_STATUS_list,
|
|
|
|
+ "EDUCATION":EDUCATION_list
|
|
|
|
+ }
|
|
|
|
+ result_2018_four = pd.DataFrame(data)
|
|
|
|
+ result_2018 = pd.merge(result_2018, result_2018_four, on=["HHID", "PN"], how="left")
|
|
|
|
+ result = pd.concat([result, result_2018], axis=0)
|
|
|
|
+ # 获取2020数据
|
|
|
|
+ with(open("/root/r_base/HRS/2020/data/H20PR_R.da", "r", encoding="utf-8") )as file:
|
|
|
|
+ HHID_list = []
|
|
|
|
+ PN_list = []
|
|
|
|
+ BIRTH_YEAR_list = []
|
|
|
|
+ SEX_list = []
|
|
|
|
+ # 逐行读取文件
|
|
|
|
+ for line in file:
|
|
|
|
+ HHID = line[0:6]
|
|
|
|
+ PN = line[6:9]
|
|
|
|
+ BIRTH_YEAR = line[36:40] if not line[36:40] == "" else np.nan
|
|
|
|
+ # 1.Male
|
|
|
|
+ # 2.Female
|
|
|
|
+ SEX = line[33:34] if not line[33:34] == "" else np.nan
|
|
|
|
+ HHID_list.append(HHID)
|
|
|
|
+ PN_list.append(PN)
|
|
|
|
+ BIRTH_YEAR_list.append(BIRTH_YEAR)
|
|
|
|
+ SEX_list.append(SEX)
|
|
|
|
+ data = {
|
|
|
|
+ "HHID":HHID_list,
|
|
|
|
+ "PN":PN_list,
|
|
|
|
+ "BIRTH_YEAR":BIRTH_YEAR_list,
|
|
|
|
+ "SEX":SEX_list,
|
|
|
|
+ }
|
|
|
|
+ data["WAVE"] = 2020
|
|
|
|
+ result_2020_one = pd.DataFrame(data)
|
|
|
|
+ with(open("/root/r_base/HRS/2020/data/H20C_R.da", "r", encoding="utf-8") )as file:
|
|
|
|
+ HHID_list = []
|
|
|
|
+ PN_list = []
|
|
|
|
+ SMOKED_list = []
|
|
|
|
+ DRINKED_list = []
|
|
|
|
+ BMI_list = []
|
|
|
|
+ HEART_PROBL_list = []
|
|
|
|
+ PHYSICAL_ACTIVITY_LEVEL_list = []
|
|
|
|
+ STROKE_list=[]
|
|
|
|
+ # 逐行读取文件
|
|
|
|
+ for line in file:
|
|
|
|
+ HHID = line[0:6]
|
|
|
|
+ PN = line[6:9]
|
|
|
|
+ # 1. YES
|
|
|
|
+ # 5. NO
|
|
|
|
+ SMOKED = line[339:340]
|
|
|
|
+ SMOKED = SMOKED if SMOKED == "1" or SMOKED == "5" else np.nan
|
|
|
|
+ # 1. YES
|
|
|
|
+ # 3. [VOL] NEVER HAVE USED ALCOHOL
|
|
|
|
+ # 5. NO
|
|
|
|
+ # 8. DK (don't know); NA (not ascertained)
|
|
|
|
+ # 9. RF (refused)
|
|
|
|
+ # 合并后
|
|
|
|
+ # 1. Yes
|
|
|
|
+ # 5. No [Inap in V502-V505]
|
|
|
|
+ DRINKED = line[363:365]
|
|
|
|
+ DRINKED = "1" if DRINKED == "1" else "5" if DRINKED == "5" or DRINKED == "3" else np.nan
|
|
|
|
+ # 3. vigorous (vigorous activity more than once a week)
|
|
|
|
+ # 2. moderate (moderate activity more than once a week)
|
|
|
|
+ # 1. inactive (the rest)
|
|
|
|
+ # 体重
|
|
|
|
+ WEIGH = line[380:383].strip()
|
|
|
|
+ WEIGH= float(WEIGH)*0.45359237 if not WEIGH=="" and not float(WEIGH)>400 and not float(WEIGH)<0 else np.nan
|
|
|
|
+ # 身高
|
|
|
|
+ HEIGHT_FEET = line[389:390].strip()
|
|
|
|
+ HEIGHT_INCHES = line[390:395].strip()
|
|
|
|
+ if not HEIGHT_FEET == "" and not HEIGHT_FEET == "8" and not HEIGHT_FEET == "9" and not pd.isna(WEIGH):
|
|
|
|
+ if not HEIGHT_INCHES == "98" and not HEIGHT_INCHES=="":
|
|
|
|
+ HEIGHT = float(HEIGHT_FEET)*0.3048 + float(HEIGHT_INCHES)*0.0254
|
|
|
|
+ # BMI
|
|
|
|
+ BMI = WEIGH / math.pow(HEIGHT,2)
|
|
|
|
+ else:
|
|
|
|
+ HEIGHT = float(HEIGHT_FEET)*0.3048
|
|
|
|
+ # BMI
|
|
|
|
+ BMI = WEIGH / math.pow(HEIGHT,2)
|
|
|
|
+ else :
|
|
|
|
+ BMI = np.nan
|
|
|
|
+ # -8. Web non-response
|
|
|
|
+ # 1. YES
|
|
|
|
+ # 4. [NEVER HAD HEART PROBLEM]
|
|
|
|
+ # 5. NO
|
|
|
|
+ # 6. [HAD HEART PROBLEM BEFORE, BUT DO NOT HAVE IT NOW AND AM NOT
|
|
|
|
+ # TAKING MEDICATION FOR IT]
|
|
|
|
+ # 8. DK (Don't Know); NA (Not Ascertained)
|
|
|
|
+ # 9. RF (Refused)
|
|
|
|
+ # A heart attack, coronary heart disease, angina, congestive heart failure, or other heart problems
|
|
|
|
+ # 1. Yes
|
|
|
|
+ # 5. No [Inap in V418-V421]
|
|
|
|
+ HEART_PROBL = line[73:75]
|
|
|
|
+ HEART_PROBL = np.nan if HEART_PROBL == "8" or HEART_PROBL == "9" or HEART_PROBL == "" or HEART_PROBL == "-8" else "1" if HEART_PROBL == "1" else "5"
|
|
|
|
+ # -8. Web non-response
|
|
|
|
+ # 1. YES
|
|
|
|
+ # 2. [[VOL] POSSIBLE STROKE OR TIA (TRANSIENT ISCHEMIC
|
|
|
|
+ # ATTACK)/POSSIBLE STROKE OR TIA (TRANSIENT ISCHEMIC ATTACK)]
|
|
|
|
+ # 4. [NEVER HAD A STROKE]
|
|
|
|
+ # 5. NO
|
|
|
|
+ # 8. DK (Don't Know); NA (Not Ascertained)
|
|
|
|
+ # 9. RF (Refused)
|
|
|
|
+ # STROKE
|
|
|
|
+ # 1. Yes
|
|
|
|
+ # 5. No [Inap in V418-V421]
|
|
|
|
+ STROKE = line[138:140]
|
|
|
|
+ STROKE = np.nan if STROKE == "8" or STROKE == "9" or STROKE == "" or STROKE == "-8" else "5" if STROKE == "2" or STROKE == "4" or STROKE == "5" else "1"
|
|
|
|
+ # 1. MORE THAN ONCE A WEEK
|
|
|
|
+ # 2. ONCE A WEEK
|
|
|
|
+ # 3. ONCE TO THREE TIMES A MONTH
|
|
|
|
+ # 4. HARDLY EVER OR NEVER
|
|
|
|
+ # 7. (VOL) EVERY DAY
|
|
|
|
+ # 8. DK (Don't Know)
|
|
|
|
+ # 9. RF (Refused)
|
|
|
|
+ # 3. vigorous (vigorous activity more than once a week)
|
|
|
|
+ # 2. moderate (moderate activity more than once a week)
|
|
|
|
+ # 1. inactive (the rest)
|
|
|
|
+ # 重度活动
|
|
|
|
+ VIGOROUS_PHYSICAL = line[325:327]
|
|
|
|
+ VIGOROUS_PHYSICAL = "3" if VIGOROUS_PHYSICAL == "1" or VIGOROUS_PHYSICAL == "2" or VIGOROUS_PHYSICAL == "7" else "1" if VIGOROUS_PHYSICAL == "3" or VIGOROUS_PHYSICAL == "4" else np.nan
|
|
|
|
+ #中度活动
|
|
|
|
+ MODERATE_PHYSICAL = line[327:329]
|
|
|
|
+ MODERATE_PHYSICAL = "2" if MODERATE_PHYSICAL == "1" or MODERATE_PHYSICAL == "2" or MODERATE_PHYSICAL == "7" else "1" if MODERATE_PHYSICAL == "3" or MODERATE_PHYSICAL == "4" else np.nan
|
|
|
|
+ # 轻度活动
|
|
|
|
+ LIGHT_PHYSICAL = np.nan
|
|
|
|
+ PHYSICAL_ACTIVITY_LEVEL = "3" if VIGOROUS_PHYSICAL=="3" else "2" if MODERATE_PHYSICAL=="2" else "1" if VIGOROUS_PHYSICAL=="1" or MODERATE_PHYSICAL=="1" else np.nan
|
|
|
|
+ HHID_list.append(HHID)
|
|
|
|
+ PN_list.append(PN)
|
|
|
|
+ PHYSICAL_ACTIVITY_LEVEL_list.append(PHYSICAL_ACTIVITY_LEVEL)
|
|
|
|
+ SMOKED_list.append(SMOKED)
|
|
|
|
+ DRINKED_list.append(DRINKED)
|
|
|
|
+ BMI_list.append(BMI)
|
|
|
|
+ HEART_PROBL_list.append(HEART_PROBL)
|
|
|
|
+ STROKE_list.append(STROKE)
|
|
|
|
+ data = {
|
|
|
|
+ "HHID":HHID_list,
|
|
|
|
+ "PN":PN_list,
|
|
|
|
+ "SMOKED":SMOKED_list,
|
|
|
|
+ "DRINKED":DRINKED_list,
|
|
|
|
+ "BMI":BMI_list,
|
|
|
|
+ "HEART_PROBL":HEART_PROBL_list,
|
|
|
|
+ "STROKE":STROKE_list,
|
|
|
|
+ "PHYSICAL_ACTIVITY_LEVEL":PHYSICAL_ACTIVITY_LEVEL_list
|
|
|
|
+ }
|
|
|
|
+ result_2020_two = pd.DataFrame(data)
|
|
|
|
+ result_2020 = pd.merge(result_2020_one, result_2020_two, on=["HHID", "PN"], how="left")
|
|
|
|
+ with(open("/root/r_base/HRS/2020/data/H20B_R.da", "r", encoding="utf-8") )as file:
|
|
|
|
+ HHID_list = []
|
|
|
|
+ PN_list = []
|
|
|
|
+ MARITAL_STATUS_list = []
|
|
|
|
+ EDUCATION_list = []
|
|
|
|
+ # 逐行读取文件
|
|
|
|
+ for line in file:
|
|
|
|
+ HHID = line[0:6]
|
|
|
|
+ PN = line[6:9]
|
|
|
|
+ # 1. MARRIED (VOL)
|
|
|
|
+ # 2. ANULLED (VOL)
|
|
|
|
+ # 3. SEPARATED
|
|
|
|
+ # 4. DIVORCED
|
|
|
|
+ # 5. WIDOWED
|
|
|
|
+ # 6. NEVER MARRIED
|
|
|
|
+ # 7. OTHER (SPECIFY)
|
|
|
|
+ # 8. DK (Don't Know); NA (Not Ascertained)
|
|
|
|
+ # 9. RF (Refused)
|
|
|
|
+ MARITAL_STATUS = line[304:305]
|
|
|
|
+ # 1 Married or Partner; 5 other
|
|
|
|
+ MARITAL_STATUS = "1" if MARITAL_STATUS=="1" else "5" if MARITAL_STATUS=="2" or MARITAL_STATUS=="3" or MARITAL_STATUS=="4" or MARITAL_STATUS=="5" or MARITAL_STATUS=="6" or MARITAL_STATUS=="7" else np.nan
|
|
|
|
+ # 0 For no formal education
|
|
|
|
+ # 1-11 .....Grades
|
|
|
|
+ # 12 .......High school
|
|
|
|
+ # 13-15 ....Some college
|
|
|
|
+ # 16 .......College grad
|
|
|
|
+ # 17 .......Post college (17+ years)
|
|
|
|
+ # 97 .......Other
|
|
|
|
+ # 98. DK (don't know); NA (not ascertained)
|
|
|
|
+ # 99. RF (refused)
|
|
|
|
+ EDUCATION = line[40:42]
|
|
|
|
+ EDUCATION = np.nan if EDUCATION == "97" or EDUCATION == "98" or EDUCATION == "99" or EDUCATION == "" else EDUCATION
|
|
|
|
+ HHID_list.append(HHID)
|
|
|
|
+ PN_list.append(PN)
|
|
|
|
+ MARITAL_STATUS_list.append(MARITAL_STATUS)
|
|
|
|
+ EDUCATION_list.append(EDUCATION)
|
|
|
|
+ data = {
|
|
|
|
+ "HHID":HHID_list,
|
|
|
|
+ "PN":PN_list,
|
|
|
|
+ "MARITAL_STATUS":MARITAL_STATUS_list,
|
|
|
|
+ "EDUCATION":EDUCATION_list
|
|
|
|
+ }
|
|
|
|
+ result_2020_four = pd.DataFrame(data)
|
|
|
|
+ result_2020 = pd.merge(result_2020, result_2020_four, on=["HHID", "PN"], how="left")
|
|
|
|
+ result = pd.concat([result, result_2020], axis=0)
|
|
|
|
+ result.to_csv("/root/r_base/HRS/result_all.csv", index=False)
|