different.py 1.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960
  1. import pandas as pd
  2. from glob import glob
  3. year = "2011"
  4. path = "/root/r_base/CHARLS/CHARLS"
  5. if __name__ == "__main__":
  6. year = "2011"
  7. files = glob(path+"2011/*.dta")
  8. var_2011 = []
  9. for file_name in files:
  10. data = pd.read_stata(file_name)
  11. var_2011 += data.columns.to_list()
  12. year = "2013"
  13. files = glob(path+"2013/*.dta")
  14. var_2013 = []
  15. for file_name in files:
  16. data = pd.read_stata(file_name)
  17. var_2013 += data.columns.to_list()
  18. #获取2013新增变量
  19. var_2011 = set(var_2011)
  20. result_2013 = [elem for elem in var_2013 if elem not in var_2011]
  21. with open("2013.csv", "w") as f2013:
  22. f2013.write('\n'.join(result_2013) + '\n')
  23. year = "2015"
  24. files = glob(path+"2015/*.dta")
  25. var_2015 = []
  26. for file_name in files:
  27. data = pd.read_stata(file_name)
  28. var_2015 += data.columns.to_list()
  29. #获取2015新增变量
  30. var_2013 = set(var_2013)
  31. result_2015 = [elem for elem in var_2015 if elem not in var_2013]
  32. with open("2015.csv", "w") as f2015:
  33. f2015.write('\n'.join(result_2015) + '\n')
  34. year = "2018"
  35. files = glob(path+"2018/*.dta")
  36. var_2018 = []
  37. for file_name in files:
  38. data = pd.read_stata(file_name)
  39. var_2018 += data.columns.to_list()
  40. #获取2018新增变量
  41. var_2015 = set(var_2015)
  42. result_2018 = [elem for elem in var_2018 if elem not in var_2015]
  43. with open("2018.csv", "w") as f2018:
  44. f2018.write('\n'.join(result_2018) + '\n')
  45. year = "2020"
  46. files = glob(path+"2020/*.dta")
  47. var_2020 = []
  48. for file_name in files:
  49. data = pd.read_stata(file_name)
  50. var_2020 += data.columns.to_list()
  51. #获取2020新增变量
  52. var_2018 = set(var_2018)
  53. result_2020 = [elem for elem in var_2020 if elem not in var_2018]
  54. with open("2020.csv", "w") as f2020:
  55. f2020.write('\n'.join(result_2020) + '\n')