CHARLS_exit.py 1.1 KB

123456789101112131415161718192021222324
  1. import pandas as pd
  2. import pyreadstat
  3. if __name__ == "__main__":
  4. #读取CHARLS数据
  5. CHARLS_data = pd.read_csv("CHARLS_data_pollutants_mete.csv")
  6. CHARLS_data.to_csv("CHARLS_data_pollutants_exit.csv",index=False)
  7. CHARLS_data = pd.read_csv("CHARLS_data_pollutants_exit.csv")
  8. #增加一列死亡状态
  9. #0:未死亡
  10. #1:死亡
  11. #读取2013年的死亡数据
  12. exit, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS2013/Exit_Interview.dta")
  13. exit['ID'] = pd.to_numeric(exit['ID'], errors='coerce').astype('Int64')
  14. exit["exit_year"] = exit["exb001_1"]
  15. CHARLS_data = pd.merge(CHARLS_data, exit[['ID', "exit_year"]], on = "ID", how="left")
  16. #读取2020年的死亡数据
  17. exit, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS2020/Exit_Module.dta")
  18. exit['ID'] = pd.to_numeric(exit['ID'], errors='coerce').astype('Int64')
  19. exit["exit_year"] = exit["exb001_1"]
  20. CHARLS_data = pd.merge(CHARLS_data, exit[['ID', "exit_year"]], on = "ID", how="left")
  21. CHARLS_data.to_csv("CHARLS_data_pollutants_exit.csv",index=False)