import pandas as pd # data = pd.read_csv("CLHLS/clhls_1998_2018_result.csv") # print(data.shape) # data = pd.read_csv("HRS/result_all.csv") # print(data.shape) # # 去重并统计ID个数 # unique_ids = data.drop_duplicates(subset=["HHID", "PN"]) # count_unique_ids = unique_ids.count() # print(count_unique_ids) # data = pd.read_csv("/root/r_base/UKDA-5050-stata/result_all.csv") # print(data.shape) # # 去重并统计ID个数 # unique_ids = data.drop_duplicates(subset=["id"]) # count_unique_ids = unique_ids.count() # print(count_unique_ids) # df = pd.read_stata('/root/r_base/UKDA-5050-stata/stata/stata13_se/wave_9_elsa_data_eul_v1.dta', convert_categoricals=False) # print(df.shape) # df = pd.read_stata('/root/r_base/UKDA-5050-stata/stata/stata13_se/wave_9_elsa_pensiongrid_eul_v2.dta', convert_categoricals=False) # print(df.shape) # df = pd.read_stata('/root/r_base/UKDA-5050-stata/stata/stata13_se/wave_9_financial_derived_variables.dta', convert_categoricals=False) # print(df.shape) # df = pd.read_stata('/root/r_base/UKDA-5050-stata/stata/stata13_se/wave_9_ifs_derived_variables.dta', convert_categoricals=False) # print(df.shape) # 指定文件夹路径 # import glob # import os # folder_path = '/root/r_base/NHANES/2017-2018' # # 获取所有 .xpt 文件的路径 # xpt_files = glob.glob(os.path.join(folder_path, '*.XPT')) # num = 0 # # 读取并处理每一个 .xpt 文件 # for file_path in xpt_files: # try: # # 使用 pandas 读取 .xpt 文件 # df = pd.read_sas(file_path, format='xport') # # 输出数据框的前几行以进行检查 # print(f"Data from {file_path}:") # print(df.shape) # num += df.shape[1] # except Exception as e: # print(f"Error reading {file_path}: {e}") # print(num) # data = pd.read_csv("/root/r_base/CHARLS/result_all_new.csv") # print(data.shape) # # # 去重并统计ID个数 # unique_ids = data.drop_duplicates(subset=["ID","communityID"]) # count_unique_ids = unique_ids.count() # print(count_unique_ids) # 指定文件夹路径 # import glob # import os # folder_path = '/root/r_base/CHARLS/CHARLS2018' # # 获取所有 .xpt 文件的路径 # xpt_files = glob.glob(os.path.join(folder_path, '*.dta')) # num = 0 # # 读取并处理每一个 .xpt 文件 # for file_path in xpt_files: # try: # # 使用 pandas 读取 .xpt 文件 # df = pd.read_stata(file_path) # # 输出数据框的前几行以进行检查 # print(f"Data from {file_path}:") # print(df.shape) # num += df.shape[1] # except Exception as e: # print(f"Error reading {file_path}: {e}") # print(num) import pyreadstat import numpy as np year = "2018" cognition, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Cognition.dta") #计算认知功能得分,分成三部分:电话问卷10分,词语回忆10分、画图1分 cognition["dc001s1_score"] = cognition["dc001_w4"].apply(lambda x : 1 if x==1 else 0 if x==5 else np.nan) cognition["dc001s2_score"] = cognition["dc006_w4"].apply(lambda x : 1 if x==1 else 0 if x==5 else np.nan) cognition["dc001s3_score"] = cognition["dc003_w4"].apply(lambda x : 1 if x==1 else 0 if x==5 else np.nan) cognition["dc002_score"] = cognition["dc005_w4"].apply(lambda x : 1 if x==1 else 0 if x==5 else np.nan) cognition["dc003_score"] = cognition["dc002_w4"].apply(lambda x : 1 if x==1 else 0 if x==5 else np.nan) cognition["dc019_score"] = cognition.apply(lambda x : 0 if x["dc014_w4_1"]==97 else 1 if pd.isna(x["dc014_w4_1"]) and x["dc014_w4_1_1"]==93 else 0 if pd.isna(x["dc014_w4_1"]) and (not x["dc014_w4_1_1"]==93) else np.nan, axis=1) cognition["dc020_score"] = cognition.apply(lambda x : 0 if x["dc014_w4_2"]==97 else 1 if pd.isna(x["dc014_w4_2"]) and x["dc014_w4_2_1"]==86 else 0 if pd.isna(x["dc014_w4_2"]) and (not x["dc014_w4_2_1"]==86) else np.nan, axis=1) cognition["dc021_score"] = cognition.apply(lambda x : 0 if x["dc014_w4_3"]==97 else 1 if pd.isna(x["dc014_w4_3"]) and x["dc014_w4_3_1"]==79 else 0 if pd.isna(x["dc014_w4_3"]) and (not x["dc014_w4_3_1"]==79) else np.nan, axis=1) cognition["dc022_score"] = cognition.apply(lambda x : 0 if x["dc014_w4_4"]==97 else 1 if pd.isna(x["dc014_w4_4"]) and x["dc014_w4_4_1"]==72 else 0 if pd.isna(x["dc014_w4_4"]) and (not x["dc014_w4_4_1"]==72) else np.nan, axis=1) cognition["dc023_score"] = cognition.apply(lambda x : 0 if x["dc014_w4_5"]==97 else 1 if pd.isna(x["dc014_w4_5"]) and x["dc014_w4_5_1"]==65 else 0 if pd.isna(x["dc014_w4_5"]) and (not x["dc014_w4_5_1"]==65) else np.nan, axis=1) #词语记忆 cognition["dc006s1_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s1"]==1 else 0, axis=1) cognition["dc006s2_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s2"]==2 else 0, axis=1) cognition["dc006s3_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s3"]==3 else 0, axis=1) cognition["dc006s4_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s4"]==4 else 0, axis=1) cognition["dc006s5_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s5"]==5 else 0, axis=1) cognition["dc006s6_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s6"]==6 else 0, axis=1) cognition["dc006s7_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s7"]==7 else 0, axis=1) cognition["dc006s8_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s8"]==8 else 0, axis=1) cognition["dc006s9_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s9"]==9 else 0, axis=1) cognition["dc006s10_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s10"]==10 else 0, axis=1) # cognition["dc006s11_score"] = cognition["dc028_w4_s11"].apply(lambda x : 1 if x==11 else 0 if pd.isna(x) else 0) cognition["dc027s1_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s1"]==1 else 0, axis=1) cognition["dc027s2_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s2"]==2 else 0, axis=1) cognition["dc027s3_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s3"]==3 else 0, axis=1) cognition["dc027s4_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s4"]==4 else 0, axis=1) cognition["dc027s5_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s5"]==5 else 0, axis=1) cognition["dc027s6_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s6"]==6 else 0, axis=1) cognition["dc027s7_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s7"]==7 else 0, axis=1) cognition["dc027s8_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s8"]==8 else 0, axis=1) cognition["dc027s9_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s9"]==9 else 0, axis=1) cognition["dc027s10_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s10"]==10 else 0, axis=1) # cognition["dc027s11_score"] = cognition["dc047_w4_s11"].apply(lambda x : 1 if x==11 else 0 if pd.isna(x) else 0) #画图 cognition["draw_score"] = cognition["dc024_w4"].apply(lambda x : 1 if x==1 else 0 if x==5 else np.nan) cognition["Cognition_score"] = cognition["dc001s1_score"] + cognition["dc001s2_score"] + \ cognition["dc001s3_score"] + cognition["dc002_score"]+ cognition["dc003_score"]+ \ cognition["dc019_score"]+ cognition["dc020_score"] + cognition["dc021_score"]+ \ cognition["dc022_score"]+ cognition["dc023_score"] + cognition["dc006s1_score"] + \ cognition["dc006s2_score"] + cognition["dc006s3_score"] + cognition["dc006s4_score"] + \ cognition["dc006s5_score"] + cognition["dc006s6_score"] + cognition["dc006s7_score"] + \ cognition["dc006s8_score"] + cognition["dc006s9_score"] + cognition["dc006s10_score"] + \ cognition["dc027s1_score"]+ cognition["dc027s2_score"]+ \ cognition["dc027s3_score"]+ cognition["dc027s4_score"]+ cognition["dc027s5_score"]+ \ cognition["dc027s6_score"]+ cognition["dc027s7_score"]+ cognition["dc027s8_score"]+ \ cognition["dc027s9_score"]+cognition["dc027s10_score"]+\ cognition["draw_score"] cognition.to_csv("/root/r_base/CHARLS/test.csv")