|
@@ -0,0 +1,78 @@
|
|
|
+import pandas as pd
|
|
|
+
|
|
|
+
|
|
|
+#HHID、PN
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+if __name__ == "__main__":
|
|
|
+ #获取所有人的出生年月
|
|
|
+ HHID_list = []
|
|
|
+ PN_list = []
|
|
|
+ BORN_YEAR_list = []
|
|
|
+ SEX_list = []
|
|
|
+ #1992加入人群
|
|
|
+ with(open("/root/r_base/HRS/1992/data/health.da", "r", encoding="utf-8") )as file:
|
|
|
+ # 逐行读取文件
|
|
|
+ for line in file:
|
|
|
+ HHID = line[0:6]
|
|
|
+ PN = line[6:9]
|
|
|
+ BORN_YEAR = line[99:104]
|
|
|
+ SEX = line[109:110] #1.Male 2.Female
|
|
|
+ HHID_list.append(HHID)
|
|
|
+ PN_list.append(PN)
|
|
|
+ BORN_YEAR_list.append(BORN_YEAR)
|
|
|
+ SEX_list.append(SEX)
|
|
|
+ print(line, end='') # end='' 用来避免多余的换行
|
|
|
+ #93年加入
|
|
|
+ with(open("/root/r_base/HRS/1993/data/BR21.DA", "r", encoding="utf-8") )as file:
|
|
|
+ # 逐行读取文件
|
|
|
+ for line in file:
|
|
|
+ HHID = line[0:6]
|
|
|
+ PN = line[6:9]
|
|
|
+ BORN_YEAR = line[61:65]
|
|
|
+ SEX = line[16:17] #1.Male 2.Female
|
|
|
+ HHID_list.append(HHID)
|
|
|
+ PN_list.append(PN)
|
|
|
+ BORN_YEAR_list.append(BORN_YEAR)
|
|
|
+ SEX_list.append(SEX)
|
|
|
+ print(line, end='') # end='' 用来避免多余的换行
|
|
|
+ #98年加入
|
|
|
+ with(open("/root/r_base/HRS/1998/data/H98CS_R.DA", "r", encoding="utf-8") )as file:
|
|
|
+ # 逐行读取文件
|
|
|
+ for line in file:
|
|
|
+ HHID = line[0:6]
|
|
|
+ PN = line[6:9]
|
|
|
+ BORN_YEAR = line[66:70]
|
|
|
+ SEX = line[70:71] #1.Male 2.Female
|
|
|
+ HHID_list.append(HHID)
|
|
|
+ PN_list.append(PN)
|
|
|
+ BORN_YEAR_list.append(BORN_YEAR)
|
|
|
+ SEX_list.append(SEX)
|
|
|
+ print(line, end='') # end='' 用来避免多余的换行
|
|
|
+ #04年加入
|
|
|
+ with(open("/root/r_base/HRS/2004/data/H04PR_R.da", "r", encoding="utf-8") )as file:
|
|
|
+ # 逐行读取文件
|
|
|
+ for line in file:
|
|
|
+ HHID = line[0:6]
|
|
|
+ PN = line[6:9]
|
|
|
+ BORN_YEAR = line[25:29]
|
|
|
+ SEX = line[20:21] #1.Male 2.Female
|
|
|
+ HHID_list.append(HHID)
|
|
|
+ PN_list.append(PN)
|
|
|
+ BORN_YEAR_list.append(BORN_YEAR)
|
|
|
+ SEX_list.append(SEX)
|
|
|
+ print(line, end='') # end='' 用来避免多余的换行
|
|
|
+ data = {
|
|
|
+ "HHID":HHID_list,
|
|
|
+ "PN":PN_list,
|
|
|
+ "BORN_YEAR":BORN_YEAR_list,
|
|
|
+ "SEX":SEX_list
|
|
|
+ }
|
|
|
+ result = pd.DataFrame(data)
|
|
|
+ print(result.info())
|
|
|
+ #进行去重处理
|
|
|
+ result.drop_duplicates(subset=["HHID","PN"], keep="last", inplace=True)
|
|
|
+ print(result.info())
|
|
|
+
|
|
|
+
|