import pandas as pd # data = pd.read_csv("CLHLS/clhls_1998_2018_result.csv") # print(data.shape) # data = pd.read_csv("HRS/result_all.csv") # print(data.shape) # # 去重并统计ID个数 # unique_ids = data.drop_duplicates(subset=["HHID", "PN"]) # count_unique_ids = unique_ids.count() # print(count_unique_ids) # data = pd.read_csv("/root/r_base/UKDA-5050-stata/result_all.csv") # print(data.shape) # # 去重并统计ID个数 # unique_ids = data.drop_duplicates(subset=["id"]) # count_unique_ids = unique_ids.count() # print(count_unique_ids) # df = pd.read_stata('/root/r_base/UKDA-5050-stata/stata/stata13_se/wave_9_elsa_data_eul_v1.dta', convert_categoricals=False) # print(df.shape) # df = pd.read_stata('/root/r_base/UKDA-5050-stata/stata/stata13_se/wave_9_elsa_pensiongrid_eul_v2.dta', convert_categoricals=False) # print(df.shape) # df = pd.read_stata('/root/r_base/UKDA-5050-stata/stata/stata13_se/wave_9_financial_derived_variables.dta', convert_categoricals=False) # print(df.shape) # df = pd.read_stata('/root/r_base/UKDA-5050-stata/stata/stata13_se/wave_9_ifs_derived_variables.dta', convert_categoricals=False) # print(df.shape) # 指定文件夹路径 # import glob # import os # folder_path = '/root/r_base/NHANES/2017-2018' # # 获取所有 .xpt 文件的路径 # xpt_files = glob.glob(os.path.join(folder_path, '*.XPT')) # num = 0 # # 读取并处理每一个 .xpt 文件 # for file_path in xpt_files: # try: # # 使用 pandas 读取 .xpt 文件 # df = pd.read_sas(file_path, format='xport') # # 输出数据框的前几行以进行检查 # print(f"Data from {file_path}:") # print(df.shape) # num += df.shape[1] # except Exception as e: # print(f"Error reading {file_path}: {e}") # print(num) # data = pd.read_csv("/root/r_base/CHARLS/result_all.csv") # print(data.shape) # # 去重并统计ID个数 # unique_ids = data.drop_duplicates(subset=["householdID"]) # count_unique_ids = unique_ids.count() # print(count_unique_ids) # 指定文件夹路径 # import glob # import os # folder_path = '/root/r_base/CHARLS/CHARLS2018' # # 获取所有 .xpt 文件的路径 # xpt_files = glob.glob(os.path.join(folder_path, '*.dta')) # num = 0 # # 读取并处理每一个 .xpt 文件 # for file_path in xpt_files: # try: # # 使用 pandas 读取 .xpt 文件 # df = pd.read_stata(file_path) # # 输出数据框的前几行以进行检查 # print(f"Data from {file_path}:") # print(df.shape) # num += df.shape[1] # except Exception as e: # print(f"Error reading {file_path}: {e}") # print(num)