123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960 |
- import pandas as pd
- from glob import glob
- year = "2011"
- path = "/root/r_base/CHARLS/CHARLS"
- if __name__ == "__main__":
- year = "2011"
- files = glob(path+"2011/*.dta")
- var_2011 = []
- for file_name in files:
- data = pd.read_stata(file_name)
- var_2011 += data.columns.to_list()
- year = "2013"
- files = glob(path+"2013/*.dta")
- var_2013 = []
- for file_name in files:
- data = pd.read_stata(file_name)
- var_2013 += data.columns.to_list()
- #获取2013新增变量
- var_2011 = set(var_2011)
- result_2013 = [elem for elem in var_2013 if elem not in var_2011]
- with open("2013.csv", "w") as f2013:
- f2013.write('\n'.join(result_2013) + '\n')
- year = "2015"
- files = glob(path+"2015/*.dta")
- var_2015 = []
- for file_name in files:
- data = pd.read_stata(file_name)
- var_2015 += data.columns.to_list()
- #获取2015新增变量
- var_2013 = set(var_2013)
- result_2015 = [elem for elem in var_2015 if elem not in var_2013]
- with open("2015.csv", "w") as f2015:
- f2015.write('\n'.join(result_2015) + '\n')
- year = "2018"
- files = glob(path+"2018/*.dta")
- var_2018 = []
- for file_name in files:
- data = pd.read_stata(file_name)
- var_2018 += data.columns.to_list()
- #获取2018新增变量
- var_2015 = set(var_2015)
- result_2018 = [elem for elem in var_2018 if elem not in var_2015]
- with open("2018.csv", "w") as f2018:
- f2018.write('\n'.join(result_2018) + '\n')
- year = "2020"
- files = glob(path+"2020/*.dta")
- var_2020 = []
- for file_name in files:
- data = pd.read_stata(file_name)
- var_2020 += data.columns.to_list()
- #获取2020新增变量
- var_2018 = set(var_2018)
- result_2020 = [elem for elem in var_2020 if elem not in var_2018]
- with open("2020.csv", "w") as f2020:
- f2020.write('\n'.join(result_2020) + '\n')
|