123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778 |
- import pandas as pd
- #HHID、PN
- if __name__ == "__main__":
- #获取所有人的出生年月
- HHID_list = []
- PN_list = []
- BORN_YEAR_list = []
- SEX_list = []
- #1992加入人群
- with(open("/root/r_base/HRS/1992/data/health.da", "r", encoding="utf-8") )as file:
- # 逐行读取文件
- for line in file:
- HHID = line[0:6]
- PN = line[6:9]
- BORN_YEAR = line[99:104]
- SEX = line[109:110] #1.Male 2.Female
- HHID_list.append(HHID)
- PN_list.append(PN)
- BORN_YEAR_list.append(BORN_YEAR)
- SEX_list.append(SEX)
- print(line, end='') # end='' 用来避免多余的换行
- #93年加入
- with(open("/root/r_base/HRS/1993/data/BR21.DA", "r", encoding="utf-8") )as file:
- # 逐行读取文件
- for line in file:
- HHID = line[0:6]
- PN = line[6:9]
- BORN_YEAR = line[61:65]
- SEX = line[16:17] #1.Male 2.Female
- HHID_list.append(HHID)
- PN_list.append(PN)
- BORN_YEAR_list.append(BORN_YEAR)
- SEX_list.append(SEX)
- print(line, end='') # end='' 用来避免多余的换行
- #98年加入
- with(open("/root/r_base/HRS/1998/data/H98CS_R.DA", "r", encoding="utf-8") )as file:
- # 逐行读取文件
- for line in file:
- HHID = line[0:6]
- PN = line[6:9]
- BORN_YEAR = line[66:70]
- SEX = line[70:71] #1.Male 2.Female
- HHID_list.append(HHID)
- PN_list.append(PN)
- BORN_YEAR_list.append(BORN_YEAR)
- SEX_list.append(SEX)
- print(line, end='') # end='' 用来避免多余的换行
- #04年加入
- with(open("/root/r_base/HRS/2004/data/H04PR_R.da", "r", encoding="utf-8") )as file:
- # 逐行读取文件
- for line in file:
- HHID = line[0:6]
- PN = line[6:9]
- BORN_YEAR = line[25:29]
- SEX = line[20:21] #1.Male 2.Female
- HHID_list.append(HHID)
- PN_list.append(PN)
- BORN_YEAR_list.append(BORN_YEAR)
- SEX_list.append(SEX)
- print(line, end='') # end='' 用来避免多余的换行
- data = {
- "HHID":HHID_list,
- "PN":PN_list,
- "BORN_YEAR":BORN_YEAR_list,
- "SEX":SEX_list
- }
- result = pd.DataFrame(data)
- print(result.info())
- #进行去重处理
- result.drop_duplicates(subset=["HHID","PN"], keep="last", inplace=True)
- print(result.info())
|