1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263 |
- import pandas as pd
- import numpy as np
- import pyreadstat
- from datetime import date
- from lunarcalendar import Converter, Lunar
- def change_columns(df):
- df.columns = ["ID",'householdID','communityID','rgender', "birth_year", "birth_month", "ba003", "iyear", "imonth", "marital_status" , "education", 'province', 'city',"urban_nbs","Height", "Weight",
- "waist", "Systolic","Diastolic", "Sit_Stand_5x", "Walking_Speed_Time",
- 'bl_wbc','bl_mcv','bl_plt','bl_bun','bl_glu','bl_crea','bl_cho', 'bl_tg', 'bl_hdl', 'bl_ldl','bl_crp',
- 'bl_hbalc','bl_ua', 'bl_hct', 'bl_hgb','bl_cysc',
- 'Hypertension','Dyslipidemia','Disabetes_or_High_Blood_Sugar','Cancer_or_Malignant_Tumor','Chronic_Lung_Diseases',
- 'Liver_Disease', 'Heart_Problems', 'Stroke', 'Kidney_Diease','Stomach_or_Other_Digestive_Disease',
- 'Emotional_Nervous_or_Psychiatric_Problems', 'Memory_Related_Disease','Arthritis_or_Rheumatism','Asthma',
- 'Physical_activity',
- 'Smoke','Drink', "Accident_Or_Injury","Fell_In_Last2Years", "Wear_Glasses"
- , "Average_Sleep_Hours", "Average_Nap_Minutes", "Vigorous_Activity_10Min", "Moderate_Effort_10Min"
- , "Walking_10Min", "Vigorous_Activity_Days", "Moderate_Effort_Days", "Walking_Days"
- , "Interacted_With_Friends", "Played_Ma_jong", "Provided_help", "Sport", "Community_Related_Organization", "Charity_work", "Training_course", "Other", "None"
- , "Internet_Usage_LastMonth", "Drink_PastYear",
- "Cognition_score", "Psychiatric_score","sleep_state", "ADL",
- 'Gas_Connection','Heating_Facility', 'Heating_Energy', 'Cooking_Fuel', "wave",
- ]
- def process_row(row):
- da002_12_ = row['da003_12_']
- da002_13_ = row['da003_13_']
-
- if da002_12_ == 1 or da002_13_ == 1:
- return 1
- elif da002_12_ == 2 and da002_13_ == 2:
- return 2
- elif (da002_12_ == 2 and pd.isna(da002_13_)) or (pd.isna(da002_12_) and da002_13_ == 2):
- return 2
- elif pd.isna(da002_12_) and pd.isna(da002_13_):
- return np.nan
- else:
- return np.nan
-
- def update_da051(value):
- if value == 1:
- return 3
- elif value == 3:
- return 1
- else:
- return value
-
- if __name__ == "__main__":
-
- year = "2011"
- demo, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/demographic_background.dta")
- psu, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/psu.dta", encoding='gbk')
- biomarkers, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/biomarkers.dta")
- blood, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Blood_20140429.dta")
- health_status, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/health_status_and_functioning.dta")
- health_care, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/health_care_and_insurance.dta")
- exp_income, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/exp_income_wealth.dta")
- weight, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/weight.dta")
- houseing, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/housing_characteristics.dta")
-
-
-
- demo["marital_status"] = demo.apply(lambda x : 1 if x["be001"]==1 or x["be001"]==2 or x["be002"]==1 else 0 if x["be001"] in [3,4,5,6] else np.nan, axis=1)
-
-
-
-
-
- demo["education"] = demo["bd001"].apply(lambda x : 1 if x == 6 or x == 7 else 2 if x in [8, 9, 10, 11] else 0 if x in [1,2,3,4,5] else np.nan)
-
- demo = pd.merge(demo, weight[["ID", "iyear", "imonth"]], on = "ID", how="left")
- data_2011 = demo[['ID','householdID', 'communityID','rgender','ba002_1', 'ba002_2','ba003',"iyear", "imonth" ,'marital_status', 'education']]
-
-
-
- data_2011 = pd.merge(data_2011, psu[['communityID', 'province', 'city', 'urban_nbs']], on = "communityID", how="left")
-
- biomarkers["qi002"] = biomarkers["qi002"].apply(lambda x : np.nan if x >210 else x)
- biomarkers["ql002"] = biomarkers["ql002"].apply(lambda x : np.nan if x >150 else x)
-
- biomarkers['waist'] = biomarkers["qm002"].apply(lambda x : np.nan if x >210 else x)
-
- biomarkers["qa007"] = biomarkers["qa007"].apply(lambda x : np.nan if x >300 else x)
- biomarkers["qa011"] = biomarkers["qa011"].apply(lambda x : np.nan if x >300 else x)
- biomarkers["qa008"] = biomarkers["qa008"].apply(lambda x : np.nan if x >150 else x)
- biomarkers["qa012"] = biomarkers["qa012"].apply(lambda x : np.nan if x >150 else x)
- biomarkers["Systolic"] = (biomarkers["qa007"] + biomarkers["qa011"]) /2
- biomarkers["Diastolic"] = (biomarkers["qa008"] + biomarkers["qa012"]) /2
-
-
-
- biomarkers["Sit_Stand_5x"] = biomarkers["qh002"].apply(lambda x : 1 if x == 1 else 0 if x == 5 else np.nan)
-
- biomarkers["Walking_Speed_Time"] = (biomarkers["qg002"] + biomarkers["qg003"]) /2
-
- biomarkers_select = biomarkers[['ID','householdID', 'communityID','qi002','ql002', "waist",'Systolic','Diastolic', "Sit_Stand_5x", "Walking_Speed_Time"]]
- data_2011 = pd.merge(data_2011, biomarkers_select, on = ["ID", "householdID", "communityID"], how="left")
-
-
- blood = blood.loc[:, blood.columns.difference(["bloodweight", "qc1_va003"])]
- data_2011 = pd.merge(data_2011, blood, on = ["ID"], how="left")
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- health_status["Physical_activity"] = health_status.apply(lambda x : 2 if x["da051_1_"]==1 else
- 1 if x["da051_2_"]==1 else
- 0 if x["da051_3_"] == 1 or (x["da051_1_"]==2 and x["da051_2_"]==2 and x["da051_3_"] == 2)
- else np.nan ,axis=1)
-
-
-
- health_status["Smoke"] = health_status["da059"].apply(lambda x : 1 if x ==1 else 0 if x == 2 else np.nan)
-
-
-
- health_status["Drink"] = health_status.apply(lambda x : 1 if x["da067"] ==1 or x["da067"] ==2 else
- 0 if x["da069"] == 1 else
- 1 if x["da069"] == 2 or x["da069"] == 3 else np.nan, axis=1)
-
-
-
-
- health_status['Accident_Or_Injury']=health_status["da021"].apply(lambda x : 1 if x ==1 else
- 0 if x == 2 else np.nan)
-
-
-
- health_status['Fell_In_Last2Years']=health_status["da023"].apply(lambda x : 1 if x ==1 else
- 0 if x == 2 else np.nan)
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- health_status['Wear_Glasses']=health_status["da032"].apply(lambda x : 1 if x == 1 else 2 if x ==2 else 0 if x == 3 else np.nan)
-
- health_status['Average_Sleep_Hours']=health_status["da049"]
-
- health_status['Average_Nap_Minutes']=health_status["da050"]
-
- health_status['Vigorous_Activity_10Min']=health_status["da051_1_"].apply(lambda x : 1 if x == 1 else 0 if x ==2 else np.nan)
-
- health_status['Moderate_Effort_10Min']=health_status["da051_2_"].apply(lambda x : 1 if x == 1 else 0 if x ==2 else np.nan)
-
- health_status['Walking_10Min']=health_status["da051_3_"].apply(lambda x : 1 if x == 1 else 0 if x ==2 else np.nan)
-
- health_status['Vigorous_Activity_Days']=health_status.apply(lambda x : np.nan if pd.isna(x["Vigorous_Activity_10Min"]) else 0 if pd.isna(x["da052_1_"]) else x["da052_1_"], axis=1)
-
- health_status['Moderate_Effort_Days']=health_status.apply(lambda x : np.nan if pd.isna(x["Moderate_Effort_10Min"]) else 0 if pd.isna(x["da052_2_"]) else x["da052_2_"], axis=1)
-
- health_status['Walking_Days']=health_status.apply(lambda x : np.nan if pd.isna(x["Walking_10Min"]) else 0 if pd.isna(x["da052_3_"]) else x["da052_3_"], axis=1)
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- health_status["da056s1"] = health_status.apply(lambda x: 1 if x["da056s1"]==1 else 0, axis=1)
- health_status["da056s2"] = health_status.apply(lambda x: 1 if x["da056s2"]==2 else 0, axis=1)
- health_status["da056s3"] = health_status.apply(lambda x: 1 if x["da056s3"]==3 else 0, axis=1)
- health_status["da056s4"] = health_status.apply(lambda x: 1 if x["da056s4"]==4 else 0, axis=1)
- health_status["da056s5"] = health_status.apply(lambda x: 1 if x["da056s5"]==5 else 0, axis=1)
- health_status["da056s6"] = health_status.apply(lambda x: 1 if x["da056s6"]==6 or x["da056s7"]==7 else 0, axis=1)
- health_status["da056s7"] = health_status.apply(lambda x: 1 if x["da056s8"]==8 else 0, axis=1)
- health_status["da056s8"] = health_status.apply(lambda x: 1 if x["da056s9"]==9 or x["da056s10"]==10 or x["da056s11"]==11 else 0, axis=1)
- health_status["da056s9"] = health_status.apply(lambda x: 1 if x["da056s12"]==12 else 0, axis=1)
-
-
-
-
-
-
-
-
-
-
-
-
-
- health_status["Internet_Usage_LastMonth"] = health_status["da056s10"].apply(lambda x : 1 if x==10 else 0)
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- health_status['Drink_PastYear']=health_status["da067"]
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- health_status_select = health_status[['ID','householdID', 'communityID', 'da007_1_', 'da007_2_','da007_3_'
- ,'da007_4_','da007_5_','da007_6_','da007_7_','da007_8_','da007_9_','da007_10_','da007_11_'
- ,'da007_12_','da007_13_','da007_14_', "Physical_activity", "Smoke", "Drink"
- , "Accident_Or_Injury", "Fell_In_Last2Years", "Wear_Glasses"
- , "Average_Sleep_Hours", "Average_Nap_Minutes", "Vigorous_Activity_10Min", "Moderate_Effort_10Min"
- , "Walking_10Min", "Vigorous_Activity_Days", "Moderate_Effort_Days", "Walking_Days"
- , "da056s1", "da056s2", "da056s3", "da056s4", "da056s5", "da056s6", "da056s7", "da056s8", "da056s9"
- , "Internet_Usage_LastMonth", "Drink_PastYear"]]
-
- data_2011 = pd.merge(data_2011, health_status_select, on = ["ID", 'householdID', 'communityID'], how="left")
-
-
-
- data_2011[['Heart_attack_2_years']]=np.nan
-
-
-
- data_2011[['Recurrent_Stroke']]=np.nan
-
- health_status["dc001s1_score"] = health_status["dc001s1"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
- health_status["dc001s2_score"] = health_status["dc001s2"].apply(lambda x : 1 if x==2 else 0 if pd.isna(x) else 0)
- health_status["dc001s3_score"] = health_status["dc001s3"].apply(lambda x : 1 if x==3 else 0 if pd.isna(x) else 0)
- health_status["dc002_score"] = health_status["dc002"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
-
- health_status["dc019_score"] = health_status["dc019"].apply(lambda x : 1 if x==93 else 0 if pd.isna(x) else 0)
- health_status["dc020_score"] = health_status["dc020"].apply(lambda x : 1 if x==86 else 0 if pd.isna(x) else 0)
- health_status["dc021_score"] = health_status["dc021"].apply(lambda x : 1 if x==79 else 0 if pd.isna(x) else 0)
- health_status["dc022_score"] = health_status["dc022"].apply(lambda x : 1 if x==72 else 0 if pd.isna(x) else 0)
- health_status["dc023_score"] = health_status["dc023"].apply(lambda x : 1 if x==65 else 0 if pd.isna(x) else 0)
-
- health_status["dc006s1_score"] = health_status["dc006s1"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
- health_status["dc006s2_score"] = health_status["dc006s2"].apply(lambda x : 1 if x==2 else 0 if pd.isna(x) else 0)
- health_status["dc006s3_score"] = health_status["dc006s3"].apply(lambda x : 1 if x==3 else 0 if pd.isna(x) else 0)
- health_status["dc006s4_score"] = health_status["dc006s4"].apply(lambda x : 1 if x==4 else 0 if pd.isna(x) else 0)
- health_status["dc006s5_score"] = health_status["dc006s5"].apply(lambda x : 1 if x==5 else 0 if pd.isna(x) else 0)
- health_status["dc006s6_score"] = health_status["dc006s6"].apply(lambda x : 1 if x==6 else 0 if pd.isna(x) else 0)
- health_status["dc006s7_score"] = health_status["dc006s7"].apply(lambda x : 1 if x==7 else 0 if pd.isna(x) else 0)
- health_status["dc006s8_score"] = health_status["dc006s8"].apply(lambda x : 1 if x==8 else 0 if pd.isna(x) else 0)
- health_status["dc006s9_score"] = health_status["dc006s9"].apply(lambda x : 1 if x==9 else 0 if pd.isna(x) else 0)
- health_status["dc006s10_score"] = health_status["dc006s10"].apply(lambda x : 1 if x==10 else 0 if pd.isna(x) else 0)
-
- health_status["dc027s1_score"] = health_status["dc027s1"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
- health_status["dc027s2_score"] = health_status["dc027s2"].apply(lambda x : 1 if x==2 else 0 if pd.isna(x) else 0)
- health_status["dc027s3_score"] = health_status["dc027s3"].apply(lambda x : 1 if x==3 else 0 if pd.isna(x) else 0)
- health_status["dc027s4_score"] = health_status["dc027s4"].apply(lambda x : 1 if x==4 else 0 if pd.isna(x) else 0)
- health_status["dc027s5_score"] = health_status["dc027s5"].apply(lambda x : 1 if x==5 else 0 if pd.isna(x) else 0)
- health_status["dc027s6_score"] = health_status["dc027s6"].apply(lambda x : 1 if x==6 else 0 if pd.isna(x) else 0)
- health_status["dc027s7_score"] = health_status["dc027s7"].apply(lambda x : 1 if x==7 else 0 if pd.isna(x) else 0)
- health_status["dc027s8_score"] = health_status["dc027s8"].apply(lambda x : 1 if x==8 else 0 if pd.isna(x) else 0)
- health_status["dc027s9_score"] = health_status["dc027s9"].apply(lambda x : 1 if x==9 else 0 if pd.isna(x) else 0)
- health_status["dc027s10_score"] = health_status["dc027s10"].apply(lambda x : 1 if x==10 else 0 if pd.isna(x) else 0)
-
-
- health_status["draw_score"] = health_status["dc025"].apply(lambda x : 1 if x==1 else 0 if x==2 else np.nan)
- data_2011["Cognition_score"] = health_status["dc001s1_score"] + health_status["dc001s2_score"] + \
- health_status["dc001s3_score"] + health_status["dc002_score"]+ \
- health_status["dc019_score"]+ health_status["dc020_score"] + health_status["dc021_score"]+ \
- health_status["dc022_score"]+ health_status["dc023_score"] + health_status["dc006s1_score"] + \
- health_status["dc006s2_score"] + health_status["dc006s3_score"] + health_status["dc006s4_score"] + \
- health_status["dc006s5_score"] + health_status["dc006s6_score"] + health_status["dc006s7_score"] + \
- health_status["dc006s8_score"] + health_status["dc006s9_score"] + health_status["dc006s10_score"] + \
- health_status["dc027s1_score"]+ health_status["dc027s2_score"]+ \
- health_status["dc027s3_score"]+ health_status["dc027s4_score"]+ health_status["dc027s5_score"]+ \
- health_status["dc027s6_score"]+ health_status["dc027s7_score"]+ health_status["dc027s8_score"]+ \
- health_status["dc027s9_score"]+health_status["dc027s10_score"]+\
- health_status["draw_score"]
-
- health_status["dc009_score"] = health_status["dc009"]-1
- health_status["dc010_score"] = health_status["dc010"]-1
- health_status["dc011_score"] = health_status["dc011"]-1
- health_status["dc012_score"] = health_status["dc012"]-1
- health_status["dc013_score"] = 4 - health_status["dc013"]
- health_status["dc014_score"] = health_status["dc014"]-1
- health_status["dc015_score"] = health_status["dc015"]-1
- health_status["dc016_score"] = 4 - health_status["dc016"]
- health_status["dc017_score"] = health_status["dc017"]-1
- health_status["dc018_score"] = health_status["dc018"]-1
- data_2011["psychiatric_score"] = health_status["dc009_score"] + health_status["dc010_score"] + health_status["dc011_score"] + \
- health_status["dc012_score"] + health_status["dc013_score"] + health_status["dc014_score"] + health_status["dc015_score"] + \
- health_status["dc016_score"] + health_status["dc017_score"] + health_status["dc018_score"]
-
-
-
-
-
- data_2011["sleep_state"] = health_status['dc015']
-
- health_status["db010_score"] = health_status["db010"].apply(lambda x : 0 if x==1 else 1 if x >= 2 else np.nan)
- health_status["db011_score"] = health_status["db011"].apply(lambda x : 0 if x==1 else 1 if x >= 2 else np.nan)
- health_status["db012_score"] = health_status["db012"].apply(lambda x : 0 if x==1 else 1 if x >= 2 else np.nan)
- health_status["db013_score"] = health_status["db013"].apply(lambda x : 0 if x==1 else 1 if x >= 2 else np.nan)
- health_status["db014_score"] = health_status["db014"].apply(lambda x : 0 if x==1 else 1 if x >= 2 else np.nan)
- health_status["db015_score"] = health_status["db015"].apply(lambda x : 0 if x==1 else 1 if x >= 2 else np.nan)
- data_2011["ADL"] = health_status["db010_score"] + health_status["db011_score"] + health_status["db012_score"] + health_status["db013_score"] + \
- health_status["db014_score"] + health_status["db015_score"]
-
- houseing["Gas_Connection"] = houseing["i019"].apply(lambda x : 1 if x ==1 else 0 if x == 2 else np.nan)
-
- houseing["Heating_Facility"] = houseing["i020"].apply(lambda x : 1 if x ==1 else 0 if x == 2 else np.nan)
-
-
-
-
-
-
-
-
- houseing["Heating_Energy"] = houseing["i021"].apply(lambda x : 0 if x ==7 else x if not pd.isna(x) else np.nan)
-
-
-
-
-
-
-
-
- houseing["Cooking_Fuel"] = houseing["i022"].apply(lambda x : 0 if x ==7 else x if not pd.isna(x) else np.nan)
- houseing_select = houseing[['householdID', 'communityID','Gas_Connection',
- 'Heating_Facility', 'Heating_Energy', 'Cooking_Fuel']]
- data_2011 = pd.merge(data_2011, houseing_select, on = ['householdID', 'communityID'], how="left")
- data_2011["wave"] = year
- change_columns(data_2011)
-
- data_2011["ID"] = data_2011["ID"].apply(lambda x : x[:-2] + '0' + x[-2:] if len(str(x)) >= 3 else x)
- print("2011 complete")
-
- year = "2013"
- demo, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Demographic_Background.dta")
- psu, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/PSU.dta", encoding='gbk')
- biomarkers, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Biomarker.dta")
- health_status, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Health_Status_and_Functioning.dta")
- health_care, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Health_Care_and_Insurance.dta")
- exp_income, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/exp_income_wealth.dta")
- weight, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Weights.dta")
- houseing, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Housing_Characteristics.dta")
-
-
-
-
- demo["marital_status"] = demo.apply(lambda x : 1 if x["be001"]==1 or x["be001"]==2 or x["be001"]==7 else 0 if x["be001"] in [3,4,5,6] else np.nan, axis=1)
-
-
-
-
-
- demo["education_correct"] = demo.apply(lambda x : x["bd001_w2_3"] if x["bd001_w2_1"]==2 else np.nan, axis=1)
- demo["education_correct"] = demo["education_correct"].apply(lambda x : 1 if x == 6 or x == 7 else 2 if x in [8, 9, 10, 11] else 0 if x in [1,2,3,4,5] else np.nan)
- education_correct = demo[['ID',"education_correct"]]
-
- data_2011 = pd.merge(data_2011, education_correct, on='ID', how='left')
-
- data_2011['education'] = data_2011['education_correct'].fillna(data_2011['education'])
-
- data_2011 = data_2011.drop(columns=['education_correct'])
-
- demo["education"] = demo.apply(lambda x : x["bd001"] if pd.isna(x["bd001_w2_1"]) else x["bd001_w2_4"] if not pd.isna(x["bd001_w2_4"]) and not x["bd001_w2_4"]==12 else np.nan, axis=1)
- demo["education"] = demo["education"].apply(lambda x : 1 if x == 6 or x == 7 else 2 if x in [8, 9, 10, 11] else 0 if x in [1,2,3,4,5] else np.nan)
-
- eductaion_2011 = data_2011[['ID',"education"]]
-
- demo = pd.merge(demo, eductaion_2011, on='ID', how='left', suffixes=("_2013","_2011"))
-
- demo['education'] = demo['education_2013'].fillna(demo['education_2011'])
-
- demo["birth_year"] = demo.apply(lambda x : x["ba002_1"] if not pd.isna(x["ba002_1"]) else np.nan, axis=1)
- demo["birth_month"] = demo.apply(lambda x : x["ba002_2"] if not pd.isna(x["ba002_2"]) else np.nan, axis=1)
- birth_year_2013 = demo[['ID',"birth_year", "birth_month"]]
-
- data_2011 = pd.merge(data_2011, birth_year_2013, on='ID', how='left', suffixes=("_2011","_2013"))
-
- data_2011['birth_year'] = data_2011['birth_year_2013'].fillna(data_2011['birth_year_2011'])
- data_2011['birth_month'] = data_2011['birth_month_2013'].fillna(data_2011['birth_month_2011'])
-
- data_2011 = data_2011.drop(columns=['birth_year_2013', 'birth_year_2011', 'birth_month_2013', 'birth_month_2011'])
-
- birth_year_2011 = data_2011[['ID',"birth_year", "birth_month"]]
-
- demo = pd.merge(demo, birth_year_2011, on='ID', how='left', suffixes=("_2013","_2011"))
-
- demo['birth_year'] = demo['birth_year_2013'].fillna(demo['birth_year_2011'])
- demo['birth_month'] = demo['birth_month_2013'].fillna(demo['birth_month_2011'])
-
- demo = pd.merge(demo, weight[["ID", "iyear", "imonth"]], on = "ID", how="left")
- data_2013 = demo[['ID','householdID', 'communityID','ba000_w2_3','birth_year','birth_month','ba003',"iyear", "imonth", 'marital_status', "education"]]
-
-
-
- data_2013 = pd.merge(data_2013, psu[['communityID', 'province', 'city', 'urban_nbs']], on = "communityID", how="left")
-
- biomarkers["qi002"] = biomarkers["qi002"].apply(lambda x : np.nan if x >210 else x)
- biomarkers["ql002"] = biomarkers["ql002"].apply(lambda x : np.nan if x >150 else x)
-
- biomarkers['waist'] = biomarkers["qm002"].apply(lambda x : np.nan if x >210 else x)
-
- biomarkers["qa007"] = biomarkers["qa007"].apply(lambda x : np.nan if x >300 else x)
- biomarkers["qa011"] = biomarkers["qa011"].apply(lambda x : np.nan if x >300 else x)
- biomarkers["qa008"] = biomarkers["qa008"].apply(lambda x : np.nan if x >150 else x)
- biomarkers["qa012"] = biomarkers["qa012"].apply(lambda x : np.nan if x >150 else x)
- biomarkers["Systolic"] = (biomarkers["qa007"] + biomarkers["qa011"]) /2
- biomarkers["Diastolic"] = (biomarkers["qa008"] + biomarkers["qa012"]) /2
-
-
-
- biomarkers["Sit_Stand_5x"] = biomarkers["qh002"].apply(lambda x : 1 if x == 1 else 0 if x == 5 else np.nan)
-
- biomarkers["Walking_Speed_Time"] = (biomarkers["qg002"] + biomarkers["qg003"]) /2
- biomarkers_select = biomarkers[['ID','householdID', 'communityID','qi002','ql002', 'waist','Systolic','Diastolic', "Sit_Stand_5x", "Walking_Speed_Time"]]
- data_2013 = pd.merge(data_2013, biomarkers_select, on = ["ID", "householdID", "communityID"], how="left")
-
-
- data_2013[['bl_wbc','bl_mcv','bl_plt','bl_bun','bl_glu','bl_crea','bl_cho', 'bl_tg', 'bl_hdl', 'bl_ldl','bl_crp','bl_hbalc','bl_ua', 'bl_hct', 'bl_hgb','bl_cysc']]=np.nan
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- health_status["Physical_activity"] = health_status.apply(lambda x : 2 if x["da051_1_"]==1 else
- 1 if x["da051_2_"]==1 else
- 0 if x["da051_3_"] == 1 or (x["da051_1_"]==2 and x["da051_2_"]==2 and x["da051_3_"] == 2)
- else np.nan ,axis=1)
-
-
-
-
- health_status["Smoke"] = health_status["da059"].apply(lambda x : 1 if x ==1 else 0 if x == 2 else 1)
-
-
-
- health_status["Drink"] = health_status.apply(lambda x : 1 if x["da067"] ==1 or x["da067"] ==2 else
- 0 if x["da069"] == 1 else
- 1 if x["da069"] == 2 or x["da069"] == 3 else np.nan, axis=1)
-
-
-
- health_status['Accident_Or_Injury']=health_status["da021"].apply(lambda x : 1 if x ==1 else
- 0 if x == 2 else np.nan)
-
-
-
- health_status['Fell_In_Last2Years']=health_status["da023"].apply(lambda x : 1 if x ==1 else
- 0 if x == 2 else np.nan)
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- health_status['Wear_Glasses']=health_status["da032"].apply(lambda x : 1 if x == 1 else 2 if x ==2 else 0 if x == 3 else 3 if x == 4 else np.nan)
-
- health_status['Average_Sleep_Hours']=health_status["da049"]
-
- health_status['Average_Nap_Minutes']=health_status["da050"]
-
- health_status['Vigorous_Activity_10Min']=health_status["da051_1_"].apply(lambda x : 1 if x == 1 else 0 if x ==2 else np.nan)
-
- health_status['Moderate_Effort_10Min']=health_status["da051_2_"].apply(lambda x : 1 if x == 1 else 0 if x ==2 else np.nan)
-
- health_status['Walking_10Min']=health_status["da051_3_"].apply(lambda x : 1 if x == 1 else 0 if x ==2 else np.nan)
-
- health_status['Vigorous_Activity_Days']=health_status.apply(lambda x : np.nan if pd.isna(x["Vigorous_Activity_10Min"]) else 0 if pd.isna(x["da052_1_"]) else x["da052_1_"], axis=1)
-
- health_status['Moderate_Effort_Days']=health_status.apply(lambda x : np.nan if pd.isna(x["Moderate_Effort_10Min"]) else 0 if pd.isna(x["da052_2_"]) else x["da052_2_"], axis=1)
-
- health_status['Walking_Days']=health_status.apply(lambda x : np.nan if pd.isna(x["Walking_10Min"]) else 0 if pd.isna(x["da052_3_"]) else x["da052_3_"], axis=1)
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- health_status["da056s1"] = health_status.apply(lambda x: 1 if x["da056s1"]==1 else 0, axis=1)
- health_status["da056s2"] = health_status.apply(lambda x: 1 if x["da056s2"]==2 else 0, axis=1)
- health_status["da056s3"] = health_status.apply(lambda x: 1 if x["da056s3"]==3 else 0, axis=1)
- health_status["da056s4"] = health_status.apply(lambda x: 1 if x["da056s4"]==4 else 0, axis=1)
- health_status["da056s5"] = health_status.apply(lambda x: 1 if x["da056s5"]==5 else 0, axis=1)
- health_status["da056s6"] = health_status.apply(lambda x: 1 if x["da056s6"]==6 or x["da056s7"]==7 else 0, axis=1)
- health_status["da056s7"] = health_status.apply(lambda x: 1 if x["da056s8"]==8 else 0, axis=1)
- health_status["da056s8"] = health_status.apply(lambda x: 1 if x["da056s9"]==9 or x["da056s10"]==10 or x["da056s11"]==11 else 0, axis=1)
- health_status["da056s9"] = health_status.apply(lambda x: 1 if x["da056s12"]==12 else 0, axis=1)
-
-
-
-
-
-
-
-
-
-
-
- health_status["Internet_Usage_LastMonth"] = health_status["da056s10"].apply(lambda x : 1 if x==10 else 0)
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- health_status['Drink_PastYear']=health_status["da067"]
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- health_status["Heart_attack_2_years"] = health_status.apply(lambda x : 1 if x["da007_w2_5"] ==1 else
- 0 if x["da007_w2_5"] == 2 else np.nan, axis=1)
-
-
-
-
- health_status['Recurrent_Stroke']=health_status.apply(lambda x : 1 if x["da019_w2_1"] ==1 else
- 0 if x["da019_w2_1"] == 2 else np.nan, axis=1)
-
-
- columns_to_diseases_old = ['da007_1_', 'da007_2_','da007_3_','da007_4_','da007_5_','da007_6_','da007_7_','da007_8_','da007_9_','da007_10_','da007_11_'
- ,'da007_12_','da007_13_','da007_14_']
- columns_to_diseases_new = ['Hypertension','Dyslipidemia','Disabetes_or_High_Blood_Sugar','Cancer_or_Malignant_Tumor','Chronic_Lung_Diseases',
- 'Liver_Disease', 'Heart_Problems', 'Stroke', 'Kidney_Diease','Stomach_or_Other_Digestive_Disease',
- 'Emotional_Nervous_or_Psychiatric_Problems', 'Memory_Related_Disease','Arthritis_or_Rheumatism','Asthma']
- for (col_old, col_new) in zip(columns_to_diseases_old,columns_to_diseases_new):
- health_status[col_new] = health_status.apply(lambda x : x[col_old] if not pd.isna(x[col_old]) else np.nan, axis=1)
-
- diseases_2011 = data_2011[['ID','Hypertension','Dyslipidemia','Disabetes_or_High_Blood_Sugar','Cancer_or_Malignant_Tumor','Chronic_Lung_Diseases',
- 'Liver_Disease', 'Heart_Problems', 'Stroke', 'Kidney_Diease','Stomach_or_Other_Digestive_Disease',
- 'Emotional_Nervous_or_Psychiatric_Problems', 'Memory_Related_Disease','Arthritis_or_Rheumatism','Asthma']]
-
- health_status = pd.merge(health_status, diseases_2011, on='ID', how='left', suffixes=("_2013","_2011"))
-
- for col in columns_to_diseases_new:
- health_status[col] = health_status[f'{col}_2013'].fillna(health_status[f'{col}_2011'])
- health_status_select = health_status[['ID','householdID', 'communityID', 'Hypertension','Dyslipidemia','Disabetes_or_High_Blood_Sugar','Cancer_or_Malignant_Tumor','Chronic_Lung_Diseases',
- 'Liver_Disease', 'Heart_Problems', 'Stroke', 'Kidney_Diease','Stomach_or_Other_Digestive_Disease',
- 'Emotional_Nervous_or_Psychiatric_Problems', 'Memory_Related_Disease','Arthritis_or_Rheumatism','Asthma', "Physical_activity", "Smoke", "Drink", "Accident_Or_Injury", "Fell_In_Last2Years", "Wear_Glasses" ,
- "Average_Sleep_Hours", "Average_Nap_Minutes", "Vigorous_Activity_10Min", "Moderate_Effort_10Min"
- , "Walking_10Min", "Vigorous_Activity_Days", "Moderate_Effort_Days", "Walking_Days"
- , "da056s1", "da056s2", "da056s3", "da056s4", "da056s5", "da056s6", "da056s7", "da056s8", "da056s9"
- , "Internet_Usage_LastMonth", "Drink_PastYear"]]
-
- data_2013 = pd.merge(data_2013, health_status_select, on = ["ID", 'householdID', 'communityID'], how="left")
-
- health_status["dc001s1_score"] = health_status["dc001s1"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
- health_status["dc001s2_score"] = health_status["dc001s2"].apply(lambda x : 1 if x==2 else 0 if pd.isna(x) else 0)
- health_status["dc001s3_score"] = health_status["dc001s3"].apply(lambda x : 1 if x==3 else 0 if pd.isna(x) else 0)
- health_status["dc002_score"] = health_status["dc002"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
-
- health_status["dc019_score"] = health_status["dc019"].apply(lambda x : 1 if x==93 else 0 if pd.isna(x) else 0)
- health_status["dc020_score"] = health_status["dc020"].apply(lambda x : 1 if x==86 else 0 if pd.isna(x) else 0)
- health_status["dc021_score"] = health_status["dc021"].apply(lambda x : 1 if x==79 else 0 if pd.isna(x) else 0)
- health_status["dc022_score"] = health_status["dc022"].apply(lambda x : 1 if x==72 else 0 if pd.isna(x) else 0)
- health_status["dc023_score"] = health_status["dc023"].apply(lambda x : 1 if x==65 else 0 if pd.isna(x) else 0)
-
- health_status["dc006s1_score"] = health_status["dc006_1_s1"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
- health_status["dc006s2_score"] = health_status["dc006_1_s2"].apply(lambda x : 1 if x==2 else 0 if pd.isna(x) else 0)
- health_status["dc006s3_score"] = health_status["dc006_1_s3"].apply(lambda x : 1 if x==3 else 0 if pd.isna(x) else 0)
- health_status["dc006s4_score"] = health_status["dc006_1_s4"].apply(lambda x : 1 if x==4 else 0 if pd.isna(x) else 0)
- health_status["dc006s5_score"] = health_status["dc006_1_s5"].apply(lambda x : 1 if x==5 else 0 if pd.isna(x) else 0)
- health_status["dc006s6_score"] = health_status["dc006_1_s6"].apply(lambda x : 1 if x==6 else 0 if pd.isna(x) else 0)
- health_status["dc006s7_score"] = health_status["dc006_1_s7"].apply(lambda x : 1 if x==7 else 0 if pd.isna(x) else 0)
- health_status["dc006s8_score"] = health_status["dc006_1_s8"].apply(lambda x : 1 if x==8 else 0 if pd.isna(x) else 0)
- health_status["dc006s9_score"] = health_status["dc006_1_s9"].apply(lambda x : 1 if x==9 else 0 if pd.isna(x) else 0)
- health_status["dc006s10_score"] = health_status["dc006_1_s10"].apply(lambda x : 1 if x==10 else 0 if pd.isna(x) else 0)
-
- health_status["dc027s1_score"] = health_status["dc027s1"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
- health_status["dc027s2_score"] = health_status["dc027s2"].apply(lambda x : 1 if x==2 else 0 if pd.isna(x) else 0)
- health_status["dc027s3_score"] = health_status["dc027s3"].apply(lambda x : 1 if x==3 else 0 if pd.isna(x) else 0)
- health_status["dc027s4_score"] = health_status["dc027s4"].apply(lambda x : 1 if x==4 else 0 if pd.isna(x) else 0)
- health_status["dc027s5_score"] = health_status["dc027s5"].apply(lambda x : 1 if x==5 else 0 if pd.isna(x) else 0)
- health_status["dc027s6_score"] = health_status["dc027s6"].apply(lambda x : 1 if x==6 else 0 if pd.isna(x) else 0)
- health_status["dc027s7_score"] = health_status["dc027s7"].apply(lambda x : 1 if x==7 else 0 if pd.isna(x) else 0)
- health_status["dc027s8_score"] = health_status["dc027s8"].apply(lambda x : 1 if x==8 else 0 if pd.isna(x) else 0)
- health_status["dc027s9_score"] = health_status["dc027s9"].apply(lambda x : 1 if x==9 else 0 if pd.isna(x) else 0)
- health_status["dc027s10_score"] = health_status["dc027s10"].apply(lambda x : 1 if x==10 else 0 if pd.isna(x) else 0)
-
-
- health_status["draw_score"] = health_status["dc025"].apply(lambda x : 1 if x==1 else 0 if x==2 else np.nan)
- data_2013["Cognition_score"] = health_status["dc001s1_score"] + health_status["dc001s2_score"] + \
- health_status["dc001s3_score"] + health_status["dc002_score"]+ \
- health_status["dc019_score"]+ health_status["dc020_score"] + health_status["dc021_score"]+ \
- health_status["dc022_score"]+ health_status["dc023_score"] + health_status["dc006s1_score"] + \
- health_status["dc006s2_score"] + health_status["dc006s3_score"] + health_status["dc006s4_score"] + \
- health_status["dc006s5_score"] + health_status["dc006s6_score"] + health_status["dc006s7_score"] + \
- health_status["dc006s8_score"] + health_status["dc006s9_score"] + health_status["dc006s10_score"] + \
- health_status["dc027s1_score"]+ health_status["dc027s2_score"]+ \
- health_status["dc027s3_score"]+ health_status["dc027s4_score"]+ health_status["dc027s5_score"]+ \
- health_status["dc027s6_score"]+ health_status["dc027s7_score"]+ health_status["dc027s8_score"]+ \
- health_status["dc027s9_score"]+health_status["dc027s10_score"]+\
- health_status["draw_score"]
-
- health_status["dc009_score"] = health_status["dc009"]-1
- health_status["dc010_score"] = health_status["dc010"]-1
- health_status["dc011_score"] = health_status["dc011"]-1
- health_status["dc012_score"] = health_status["dc012"]-1
- health_status["dc013_score"] = 4 - health_status["dc013"]
- health_status["dc014_score"] = health_status["dc014"]-1
- health_status["dc015_score"] = health_status["dc015"]-1
- health_status["dc016_score"] = 4 - health_status["dc016"]
- health_status["dc017_score"] = health_status["dc017"]-1
- health_status["dc018_score"] = health_status["dc018"]-1
- data_2013["psychiatric_score"] = health_status["dc009_score"] + health_status["dc010_score"] + health_status["dc011_score"] + \
- health_status["dc012_score"] + health_status["dc013_score"] + health_status["dc014_score"] + health_status["dc015_score"] + \
- health_status["dc016_score"] + health_status["dc017_score"] + health_status["dc018_score"]
-
-
-
-
-
-
- data_2013["sleep_state"] = health_status['dc015']
-
- health_status["db010_score"] = health_status["db010"].apply(lambda x : 0 if x==1 else 1 if x >= 2 else np.nan)
- health_status["db011_score"] = health_status["db011"].apply(lambda x : 0 if x==1 else 1 if x >= 2 else np.nan)
- health_status["db012_score"] = health_status["db012"].apply(lambda x : 0 if x==1 else 1 if x >= 2 else np.nan)
- health_status["db013_score"] = health_status["db013"].apply(lambda x : 0 if x==1 else 1 if x >= 2 else np.nan)
- health_status["db014_score"] = health_status["db014"].apply(lambda x : 0 if x==1 else 1 if x >= 2 else np.nan)
- health_status["db015_score"] = health_status["db015"].apply(lambda x : 0 if x==1 else 1 if x >= 2 else np.nan)
- data_2013["ADL"] = health_status["db010_score"] + health_status["db011_score"] + health_status["db012_score"] + health_status["db013_score"] + \
- health_status["db014_score"] + health_status["db015_score"]
-
-
- houseing["Gas_Connection"] = houseing["i019"].apply(lambda x : 1 if x ==1 else 0 if x == 2 else np.nan)
-
- houseing["Heating_Facility"] = houseing["i020"].apply(lambda x : 1 if x ==1 else 0 if x == 2 else np.nan)
-
-
-
-
-
-
-
-
- houseing["Heating_Energy"] = houseing["i021"].apply(lambda x : 0 if x ==7 else x if not pd.isna(x) else np.nan)
-
-
-
-
-
-
-
-
- houseing["Cooking_Fuel"] = houseing["i022"].apply(lambda x : 0 if x ==7 else x if not pd.isna(x) else np.nan)
- houseing_select = houseing[['ID','householdID', 'communityID','Gas_Connection',
- 'Heating_Facility', 'Heating_Energy', 'Cooking_Fuel']]
- data_2013 = pd.merge(data_2013, houseing_select, on = ["ID", 'householdID', 'communityID'], how="left")
-
- data_2013["wave"] = year
- change_columns(data_2013)
- data_2013 = pd.concat([data_2011, data_2013], axis=0)
- print("2013 complete")
-
- year = "2015"
- demo, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Demographic_Background.dta")
- psu, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS2013/PSU.dta", encoding='gbk')
- blood, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Blood.dta")
- biomarkers, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Biomarker.dta")
- health_status, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Health_Status_and_Functioning.dta")
- health_care, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Health_Care_and_Insurance.dta")
- weight, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Sample_Infor.dta")
- houseing, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Housing_Characteristics.dta")
-
-
-
-
- demo["marital_status"] = demo.apply(lambda x : 1 if x["be001"]==1 or x["be001"]==2 or x["be001"]==7 else 0 if x["be001"] in [3,4,5,6] else np.nan, axis=1)
-
-
-
-
-
-
- demo["education"] = demo.apply(lambda x : x["bd001_w2_4"] if not pd.isna(x["bd001_w2_4"]) and not x["bd001_w2_4"]==12 else np.nan, axis=1)
- demo["education"] = demo["education"].apply(lambda x : 1 if x == 6 or x == 7 else 2 if x in [8, 9, 10, 11] else 0 if x in [1,2,3,4,5] else np.nan)
-
- eductaion_2013 = data_2013[data_2013["wave"]=="2013"][['ID',"education"]]
-
- demo = pd.merge(demo, eductaion_2013, on='ID', how='left', suffixes=("_2015","_2013"))
-
- demo['education'] = demo['education_2015'].fillna(demo['education_2013'])
-
- demo["birth_year"] = demo.apply(lambda x : x["ba004_w3_1"] if x["ba002"]==1 else x["ba002_1"] if x["ba002"]==2 else np.nan, axis=1)
- demo["birth_month"] = demo.apply(lambda x : x["ba004_w3_2"] if x["ba002"]==1 else x["ba002_2"] if x["ba002"]==2 else np.nan, axis=1)
-
-
- demo = pd.merge(demo, weight[["ID", "iyear", "imonth"]], on = "ID", how="left")
- data_2015 = demo[['ID','householdID', 'communityID','ba000_w2_3', 'birth_year','birth_month','ba003',"iyear", "imonth", 'marital_status', 'education']]
-
-
-
- data_2015 = pd.merge(data_2015, psu[['communityID', 'province', 'city', 'urban_nbs']], on = "communityID", how="left")
-
- biomarkers["qi002"] = biomarkers["qi002"].apply(lambda x : np.nan if x >210 else x)
- biomarkers["ql002"] = biomarkers["ql002"].apply(lambda x : np.nan if x >150 else x)
-
- biomarkers['waist'] = biomarkers["qm002"].apply(lambda x : np.nan if x >210 else x)
-
- biomarkers["qa007"] = biomarkers["qa007"].apply(lambda x : np.nan if x >300 else x)
- biomarkers["qa011"] = biomarkers["qa011"].apply(lambda x : np.nan if x >300 else x)
- biomarkers["qa008"] = biomarkers["qa008"].apply(lambda x : np.nan if x >150 else x)
- biomarkers["qa012"] = biomarkers["qa012"].apply(lambda x : np.nan if x >150 else x)
- biomarkers["Systolic"] = (biomarkers["qa007"] + biomarkers["qa011"]) /2
- biomarkers["Diastolic"] = (biomarkers["qa008"] + biomarkers["qa012"]) /2
-
-
-
- biomarkers["Sit_Stand_5x"] = biomarkers["qh002"].apply(lambda x : 1 if x == 1 else 0 if x == 5 else np.nan)
-
- biomarkers["Walking_Speed_Time"] = (biomarkers["qg002"] + biomarkers["qg003"]) /2
-
- biomarkers_select = biomarkers[['ID','householdID', 'communityID','qi002', 'ql002', 'waist', 'Systolic','Diastolic', "Sit_Stand_5x", "Walking_Speed_Time"]]
- data_2015 = pd.merge(data_2015, biomarkers_select, on = ["ID", "householdID", "communityID"], how="left")
-
-
- blood = blood[['ID', 'bl_wbc','bl_mcv','bl_plt','bl_bun','bl_glu','bl_crea','bl_cho', 'bl_tg', 'bl_hdl', 'bl_ldl','bl_crp','bl_hbalc','bl_ua', 'bl_hct', 'bl_hgb','bl_cysc']]
- data_2015 = pd.merge(data_2015, blood, on = ["ID"], how="left")
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- health_status["Physical_activity"] = health_status.apply(lambda x : 2 if x["da051_1_"]==1 else
- 1 if x["da051_2_"]==1 else
- 0 if x["da051_3_"] == 1 or (x["da051_1_"]==2 and x["da051_2_"]==2 and x["da051_3_"] == 2)
- else np.nan ,axis=1)
-
-
-
-
- health_status["Smoke"] = health_status["da059"].apply(lambda x : 1 if x ==1 else 0 if x == 2 else 1)
-
-
-
- health_status["Drink"] = health_status.apply(lambda x : 1 if x["da067"] ==1 or x["da067"] ==2 else
- 0 if x["da069"] == 1 else
- 1 if x["da069"] == 2 or x["da069"] == 3 else np.nan, axis=1)
-
-
-
-
- health_status['Accident_Or_Injury']=health_status["da021"].apply(lambda x : 1 if x ==1 else
- 0 if x == 2 else np.nan)
-
-
-
- health_status['Fell_In_Last2Years']=health_status["da023"].apply(lambda x : 1 if x ==1 else
- 0 if x == 2 else np.nan)
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- health_status['Wear_Glasses']=health_status["da032"].apply(lambda x : 1 if x == 1 else 2 if x ==2 else 0 if x == 3 else 3 if x == 4 else np.nan)
-
- health_status['Average_Sleep_Hours']=health_status["da049"]
-
- health_status['Average_Nap_Minutes']=health_status["da050"]
-
- health_status['Vigorous_Activity_10Min']=health_status["da051_1_"].apply(lambda x : 1 if x == 1 else 0 if x ==2 else np.nan)
-
- health_status['Moderate_Effort_10Min']=health_status["da051_2_"].apply(lambda x : 1 if x == 1 else 0 if x ==2 else np.nan)
-
- health_status['Walking_10Min']=health_status["da051_3_"].apply(lambda x : 1 if x == 1 else 0 if x ==2 else np.nan)
-
- health_status['Vigorous_Activity_Days']=health_status.apply(lambda x : np.nan if pd.isna(x["Vigorous_Activity_10Min"]) else 0 if pd.isna(x["da052_1_"]) else x["da052_1_"], axis=1)
-
- health_status['Moderate_Effort_Days']=health_status.apply(lambda x : np.nan if pd.isna(x["Moderate_Effort_10Min"]) else 0 if pd.isna(x["da052_2_"]) else x["da052_2_"], axis=1)
-
- health_status['Walking_Days']=health_status.apply(lambda x : np.nan if pd.isna(x["Walking_10Min"]) else 0 if pd.isna(x["da052_3_"]) else x["da052_3_"], axis=1)
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- health_status["da056s1"] = health_status.apply(lambda x: 1 if x["da056s1"]==1 else 0, axis=1)
- health_status["da056s2"] = health_status.apply(lambda x: 1 if x["da056s2"]==2 else 0, axis=1)
- health_status["da056s3"] = health_status.apply(lambda x: 1 if x["da056s3"]==3 else 0, axis=1)
- health_status["da056s4"] = health_status.apply(lambda x: 1 if x["da056s4"]==4 else 0, axis=1)
- health_status["da056s5"] = health_status.apply(lambda x: 1 if x["da056s5"]==5 else 0, axis=1)
- health_status["da056s6"] = health_status.apply(lambda x: 1 if x["da056s6"]==6 or x["da056s7"]==7 else 0, axis=1)
- health_status["da056s7"] = health_status.apply(lambda x: 1 if x["da056s8"]==8 else 0, axis=1)
- health_status["da056s8"] = health_status.apply(lambda x: 1 if x["da056s9"]==9 or x["da056s10"]==10 or x["da056s11"]==11 else 0, axis=1)
- health_status["da056s9"] = health_status.apply(lambda x: 1 if x["da056s12"]==12 else 0, axis=1)
-
-
-
-
-
-
-
-
-
-
-
- health_status["Internet_Usage_LastMonth"] = health_status["da056s10"].apply(lambda x : 1 if x==10 else 0)
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- health_status['Drink_PastYear']=health_status["da067"]
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- health_status["Heart_attack_2_years"] = health_status.apply(lambda x : 1 if x["da007_w2_5"] ==1 else
- 0 if x["da007_w2_5"] == 2 else np.nan, axis=1)
-
-
-
- health_status['Recurrent_Stroke']=health_status.apply(lambda x : 1 if x["da019_w2_1"] ==1 else
- 0 if x["da019_w2_1"] == 2 else np.nan, axis=1)
-
- columns_to_diseases_old = ['da007_1_', 'da007_2_','da007_3_','da007_4_','da007_5_','da007_6_','da007_7_','da007_8_','da007_9_','da007_10_','da007_11_'
- ,'da007_12_','da007_13_','da007_14_']
- columns_to_diseases_new = ['Hypertension','Dyslipidemia','Disabetes_or_High_Blood_Sugar','Cancer_or_Malignant_Tumor','Chronic_Lung_Diseases',
- 'Liver_Disease', 'Heart_Problems', 'Stroke', 'Kidney_Diease','Stomach_or_Other_Digestive_Disease',
- 'Emotional_Nervous_or_Psychiatric_Problems', 'Memory_Related_Disease','Arthritis_or_Rheumatism','Asthma']
- for (col_old, col_new) in zip(columns_to_diseases_old,columns_to_diseases_new):
- health_status[col_new] = health_status.apply(lambda x : x[col_old] if not pd.isna(x[col_old]) else np.nan, axis=1)
-
- diseases_2013 = data_2013[data_2013["wave"]=="2013"][['ID','Hypertension','Dyslipidemia','Disabetes_or_High_Blood_Sugar','Cancer_or_Malignant_Tumor','Chronic_Lung_Diseases',
- 'Liver_Disease', 'Heart_Problems', 'Stroke', 'Kidney_Diease','Stomach_or_Other_Digestive_Disease',
- 'Emotional_Nervous_or_Psychiatric_Problems', 'Memory_Related_Disease','Arthritis_or_Rheumatism','Asthma']]
-
- health_status = pd.merge(health_status, diseases_2013, on='ID', how='left', suffixes=("_2015","_2013"))
-
- for col in columns_to_diseases_new:
- health_status[col] = health_status[f'{col}_2015'].fillna(health_status[f'{col}_2013'])
- health_status_select = health_status[['ID','householdID', 'communityID', 'Hypertension','Dyslipidemia','Disabetes_or_High_Blood_Sugar','Cancer_or_Malignant_Tumor','Chronic_Lung_Diseases',
- 'Liver_Disease', 'Heart_Problems', 'Stroke', 'Kidney_Diease','Stomach_or_Other_Digestive_Disease',
- 'Emotional_Nervous_or_Psychiatric_Problems', 'Memory_Related_Disease','Arthritis_or_Rheumatism','Asthma', "Physical_activity", "Smoke", "Drink", "Accident_Or_Injury", "Fell_In_Last2Years", "Wear_Glasses" ,
- "Average_Sleep_Hours", "Average_Nap_Minutes", "Vigorous_Activity_10Min", "Moderate_Effort_10Min"
- , "Walking_10Min", "Vigorous_Activity_Days", "Moderate_Effort_Days", "Walking_Days"
- , "da056s1", "da056s2", "da056s3", "da056s4", "da056s5", "da056s6", "da056s7", "da056s8", "da056s9"
- , "Internet_Usage_LastMonth", "Drink_PastYear"]]
-
- data_2015 = pd.merge(data_2015, health_status_select, on = ["ID", 'householdID', 'communityID'], how="left")
-
- health_status["dc001s1_score"] = health_status["dc001s1"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
- health_status["dc001s2_score"] = health_status["dc001s2"].apply(lambda x : 1 if x==2 else 0 if pd.isna(x) else 0)
- health_status["dc001s3_score"] = health_status["dc001s3"].apply(lambda x : 1 if x==3 else 0 if pd.isna(x) else 0)
- health_status["dc002_score"] = health_status["dc002"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
-
- health_status["dc019_score"] = health_status["dc019"].apply(lambda x : 1 if x==93 else 0 if pd.isna(x) else 0)
- health_status["dc020_score"] = health_status["dc020"].apply(lambda x : 1 if x==86 else 0 if pd.isna(x) else 0)
- health_status["dc021_score"] = health_status["dc021"].apply(lambda x : 1 if x==79 else 0 if pd.isna(x) else 0)
- health_status["dc022_score"] = health_status["dc022"].apply(lambda x : 1 if x==72 else 0 if pd.isna(x) else 0)
- health_status["dc023_score"] = health_status["dc023"].apply(lambda x : 1 if x==65 else 0 if pd.isna(x) else 0)
-
- health_status["dc006s1_score"] = health_status["dc006s1"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
- health_status["dc006s2_score"] = health_status["dc006s2"].apply(lambda x : 1 if x==2 else 0 if pd.isna(x) else 0)
- health_status["dc006s3_score"] = health_status["dc006s3"].apply(lambda x : 1 if x==3 else 0 if pd.isna(x) else 0)
- health_status["dc006s4_score"] = health_status["dc006s4"].apply(lambda x : 1 if x==4 else 0 if pd.isna(x) else 0)
- health_status["dc006s5_score"] = health_status["dc006s5"].apply(lambda x : 1 if x==5 else 0 if pd.isna(x) else 0)
- health_status["dc006s6_score"] = health_status["dc006s6"].apply(lambda x : 1 if x==6 else 0 if pd.isna(x) else 0)
- health_status["dc006s7_score"] = health_status["dc006s7"].apply(lambda x : 1 if x==7 else 0 if pd.isna(x) else 0)
- health_status["dc006s8_score"] = health_status["dc006s8"].apply(lambda x : 1 if x==8 else 0 if pd.isna(x) else 0)
- health_status["dc006s9_score"] = health_status["dc006s9"].apply(lambda x : 1 if x==9 else 0 if pd.isna(x) else 0)
- health_status["dc006s10_score"] = health_status["dc006s10"].apply(lambda x : 1 if x==10 else 0 if pd.isna(x) else 0)
-
- health_status["dc027s1_score"] = health_status["dc027s1"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
- health_status["dc027s2_score"] = health_status["dc027s2"].apply(lambda x : 1 if x==2 else 0 if pd.isna(x) else 0)
- health_status["dc027s3_score"] = health_status["dc027s3"].apply(lambda x : 1 if x==3 else 0 if pd.isna(x) else 0)
- health_status["dc027s4_score"] = health_status["dc027s4"].apply(lambda x : 1 if x==4 else 0 if pd.isna(x) else 0)
- health_status["dc027s5_score"] = health_status["dc027s5"].apply(lambda x : 1 if x==5 else 0 if pd.isna(x) else 0)
- health_status["dc027s6_score"] = health_status["dc027s6"].apply(lambda x : 1 if x==6 else 0 if pd.isna(x) else 0)
- health_status["dc027s7_score"] = health_status["dc027s7"].apply(lambda x : 1 if x==7 else 0 if pd.isna(x) else 0)
- health_status["dc027s8_score"] = health_status["dc027s8"].apply(lambda x : 1 if x==8 else 0 if pd.isna(x) else 0)
- health_status["dc027s9_score"] = health_status["dc027s9"].apply(lambda x : 1 if x==9 else 0 if pd.isna(x) else 0)
- health_status["dc027s10_score"] = health_status["dc027s10"].apply(lambda x : 1 if x==10 else 0 if pd.isna(x) else 0)
-
-
- health_status["draw_score"] = health_status["dc025"].apply(lambda x : 1 if x==1 else 0 if x==2 else np.nan)
- data_2015["Cognition_score"] = health_status["dc001s1_score"] + health_status["dc001s2_score"] + \
- health_status["dc001s3_score"] + health_status["dc002_score"]+ \
- health_status["dc019_score"]+ health_status["dc020_score"] + health_status["dc021_score"]+ \
- health_status["dc022_score"]+ health_status["dc023_score"] + health_status["dc006s1_score"] + \
- health_status["dc006s2_score"] + health_status["dc006s3_score"] + health_status["dc006s4_score"] + \
- health_status["dc006s5_score"] + health_status["dc006s6_score"] + health_status["dc006s7_score"] + \
- health_status["dc006s8_score"] + health_status["dc006s9_score"] + health_status["dc006s10_score"] + \
- health_status["dc027s1_score"]+ health_status["dc027s2_score"]+ \
- health_status["dc027s3_score"]+ health_status["dc027s4_score"]+ health_status["dc027s5_score"]+ \
- health_status["dc027s6_score"]+ health_status["dc027s7_score"]+ health_status["dc027s8_score"]+ \
- health_status["dc027s9_score"]+health_status["dc027s10_score"]+\
- health_status["draw_score"]
-
- health_status["dc009_score"] = health_status["dc009"]-1
- health_status["dc010_score"] = health_status["dc010"]-1
- health_status["dc011_score"] = health_status["dc011"]-1
- health_status["dc012_score"] = health_status["dc012"]-1
- health_status["dc013_score"] = 4 - health_status["dc013"]
- health_status["dc014_score"] = health_status["dc014"]-1
- health_status["dc015_score"] = health_status["dc015"]-1
- health_status["dc016_score"] = 4 - health_status["dc016"]
- health_status["dc017_score"] = health_status["dc017"]-1
- health_status["dc018_score"] = health_status["dc018"]-1
- data_2015["psychiatric_score"] = health_status["dc009_score"] + health_status["dc010_score"] + health_status["dc011_score"] + \
- health_status["dc012_score"] + health_status["dc013_score"] + health_status["dc014_score"] + health_status["dc015_score"] + \
- health_status["dc016_score"] + health_status["dc017_score"] + health_status["dc018_score"]
-
-
-
-
-
- data_2015["sleep_state"] = health_status['dc015']
-
- health_status["db010_score"] = health_status["db010"].apply(lambda x : 0 if x==1 else 1 if x >= 2 else np.nan)
- health_status["db011_score"] = health_status["db011"].apply(lambda x : 0 if x==1 else 1 if x >= 2 else np.nan)
- health_status["db012_score"] = health_status["db012"].apply(lambda x : 0 if x==1 else 1 if x >= 2 else np.nan)
- health_status["db013_score"] = health_status["db013"].apply(lambda x : 0 if x==1 else 1 if x >= 2 else np.nan)
- health_status["db014_score"] = health_status["db014"].apply(lambda x : 0 if x==1 else 1 if x >= 2 else np.nan)
- health_status["db015_score"] = health_status["db015"].apply(lambda x : 0 if x==1 else 1 if x >= 2 else np.nan)
- data_2015["ADL"] = health_status["db010_score"] + health_status["db011_score"] + health_status["db012_score"] + health_status["db013_score"] + \
- health_status["db014_score"] + health_status["db015_score"]
-
-
- houseing["Gas_Connection"] = houseing["i019"].apply(lambda x : 1 if x ==1 else 0 if x == 2 else np.nan)
-
- houseing["Heating_Facility"] = houseing["i020"].apply(lambda x : 1 if x ==1 else 0 if x == 2 else np.nan)
-
-
-
-
-
-
-
-
- houseing["Heating_Energy"] = houseing["i021"].apply(lambda x : 0 if x ==7 else x if not pd.isna(x) else np.nan)
-
-
-
-
-
-
-
-
- houseing["Cooking_Fuel"] = houseing["i022"].apply(lambda x : 0 if x ==7 else x if not pd.isna(x) else np.nan)
- houseing_select = houseing[['ID','householdID', 'communityID','Gas_Connection',
- 'Heating_Facility', 'Heating_Energy', 'Cooking_Fuel']]
- data_2015 = pd.merge(data_2015, houseing_select, on = ["ID", 'householdID', 'communityID'], how="left")
-
- data_2015["wave"] = year
- change_columns(data_2015)
- data_2015 = pd.concat([data_2013, data_2015], axis=0)
- print("2015 complete")
-
- year = "2018"
- demo, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Demographic_Background.dta")
- psu, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS2013/PSU.dta", encoding='gbk')
- health_status, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Health_Status_and_Functioning.dta")
- health_care, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Health_Care_and_Insurance.dta")
- cognition, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Cognition.dta")
- weight, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Sample_Infor.dta")
- houseing, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Housing.dta")
-
-
-
- demo["marital_status"] = demo.apply(lambda x : 1 if x["be001"]==1 or x["be001"]==2 or x["be002"]==1 else 0 if x["be001"] in [3,4,5,6] else np.nan, axis=1)
-
-
-
-
-
- demo["education"] = demo.apply(lambda x : x["bd001_w2_4"] if not pd.isna(x["bd001_w2_4"]) else np.nan, axis=1)
- demo["education"] = demo["education"].apply(lambda x : 1 if x == 6 or x == 7 else 2 if x in [8, 9, 10, 11] else 0 if x in [1,2,3,4,5] else np.nan)
-
- demo["birth_year"] = demo.apply(lambda x : x["ba004_w3_1"] if x["ba005_w4"]==1 else x["ba002_1"] if x["ba005_w4"]==2 else np.nan, axis=1)
- demo["birth_month"] = demo.apply(lambda x : x["ba004_w3_2"] if x["ba005_w4"]==1 else x["ba002_2"] if x["ba005_w4"]==2 else np.nan, axis=1)
-
- demo = pd.merge(demo, weight[["ID", "iyear", "imonth"]], on = "ID", how="left")
- data_2018 = demo[['ID','householdID', 'communityID','xrgender', 'birth_year','birth_month','ba003',"iyear", "imonth", 'marital_status', 'education']]
-
-
-
- data_2018 = pd.merge(data_2018, psu[['communityID', 'province', 'city', 'urban_nbs']], on = "communityID", how="left")
-
- data_2018[['qi002', 'ql002', 'waist','qa011' ,'qa012']]=np.nan
-
- data_2018[["Sit_Stand_5x", "Walking_Speed_Time"]] = np.nan
-
-
- data_2018[['bl_wbc','bl_mcv','bl_plt','bl_bun','bl_glu','bl_crea','bl_cho', 'bl_tg', 'bl_hdl', 'bl_ldl','bl_crp','bl_hbalc','bl_ua', 'bl_hct', 'bl_hgb','bl_cysc']]=np.nan
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- health_status["Physical_activity"] = health_status.apply(lambda x : 2 if x["da051_1_"]==1 else
- 1 if x["da051_2_"]==1 else
- 0 if x["da051_3_"] == 1 or (x["da051_1_"]==2 and x["da051_2_"]==2 and x["da051_3_"] == 2)
- else np.nan ,axis=1)
-
-
-
-
- health_status["Smoke"] = health_status["da059"].apply(lambda x : 1 if x ==1 else 0 if x == 2 else 1)
-
-
-
- health_status["Drink"] = health_status.apply(lambda x : 1 if x["da067"] ==1 or x["da067"] ==2 else
- 0 if x["da069"] == 1 else
- 1 if x["da069"] == 2 or x["da069"] == 3 else np.nan, axis=1)
-
-
-
-
- health_status['Accident_Or_Injury']=health_status.apply(lambda x : 1 if (not pd.isna(x["da021"]) and x["da021"]==1) or (pd.isna(x["da021"]) and not pd.isna(x["da022"]) )else
- 0 if (not pd.isna(x["da021"]) and x["da021"]==2) or (pd.isna(x["da021"]) and pd.isna(x["da022"]) ) else np.nan, axis=1)
-
-
-
- health_status['Fell_In_Last2Years']=health_status.apply(lambda x : 1 if x["da023"] ==1 or x["da023_w4"]==1 else
- 0 if x["da023"] ==2 or x["da023_w4"]==2 else np.nan, axis=1)
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- health_status['Wear_Glasses']=health_status["da032"].apply(lambda x : 1 if x == 1 else 2 if x ==2 else 0 if x == 3 else 3 if x == 4 else np.nan)
-
- health_status['Average_Sleep_Hours']=health_status["da049"]
-
- health_status['Average_Nap_Minutes']=health_status["da050"]
-
- health_status['Vigorous_Activity_10Min']=health_status["da051_1_"].apply(lambda x : 1 if x == 1 else 0 if x ==2 else np.nan)
-
- health_status['Moderate_Effort_10Min']=health_status["da051_2_"].apply(lambda x : 1 if x == 1 else 0 if x ==2 else np.nan)
-
- health_status['Walking_10Min']=health_status["da051_3_"].apply(lambda x : 1 if x == 1 else 0 if x ==2 else np.nan)
-
- health_status['Vigorous_Activity_Days']=health_status.apply(lambda x : np.nan if pd.isna(x["Vigorous_Activity_10Min"]) else 0 if pd.isna(x["da052_1_"]) else x["da052_1_"], axis=1)
-
- health_status['Moderate_Effort_Days']=health_status.apply(lambda x : np.nan if pd.isna(x["Moderate_Effort_10Min"]) else 0 if pd.isna(x["da052_2_"]) else x["da052_2_"], axis=1)
-
- health_status['Walking_Days']=health_status.apply(lambda x : np.nan if pd.isna(x["Walking_10Min"]) else 0 if pd.isna(x["da052_3_"]) else x["da052_3_"], axis=1)
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- health_status["da056_s1"] = health_status.apply(lambda x: 1 if x["da056_s1"]==1 else 0, axis=1)
- health_status["da056_s2"] = health_status.apply(lambda x: 1 if x["da056_s2"]==2 else 0, axis=1)
- health_status["da056_s3"] = health_status.apply(lambda x: 1 if x["da056_s3"]==3 else 0, axis=1)
- health_status["da056_s4"] = health_status.apply(lambda x: 1 if x["da056_s4"]==4 else 0, axis=1)
- health_status["da056_s5"] = health_status.apply(lambda x: 1 if x["da056_s5"]==5 else 0, axis=1)
- health_status["da056_s6"] = health_status.apply(lambda x: 1 if x["da056_s6"]==6 or x["da056_s7"]==7 else 0, axis=1)
- health_status["da056_s7"] = health_status.apply(lambda x: 1 if x["da056_s8"]==8 else 0, axis=1)
- health_status["da056_s8"] = health_status.apply(lambda x: 1 if x["da056_s9"]==9 or x["da056_s10"]==10 or x["da056_s11"]==11 else 0, axis=1)
- health_status["da056_s9"] = health_status.apply(lambda x: 1 if x["da056_s12"]==12 else 0, axis=1)
-
-
-
-
-
-
-
-
-
-
-
- health_status["Internet_Usage_LastMonth"] = health_status["da056_s10"].apply(lambda x : 1 if x==10 else 0)
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- health_status['Drink_PastYear']=health_status["da067"]
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- health_status["Heart_attack_2_years"] = health_status.apply(lambda x : 1 if x["da007_w2_5"] ==1 else
- 0 if x["da007_w2_5"] == 2 else np.nan, axis=1)
-
-
-
- health_status['Recurrent_Stroke']=health_status.apply(lambda x : 1 if x["da019_w2_1"] ==1 else
- 0 if x["da019_w2_1"] == 2 else np.nan, axis=1)
-
- columns_to_diseases_old = ['da007_1_', 'da007_2_','da007_3_','da007_4_','da007_5_','da007_6_','da007_7_','da007_8_','da007_9_','da007_10_','da007_11_'
- ,'da007_12_','da007_13_','da007_14_']
- columns_to_diseases_new = ['Hypertension','Dyslipidemia','Disabetes_or_High_Blood_Sugar','Cancer_or_Malignant_Tumor','Chronic_Lung_Diseases',
- 'Liver_Disease', 'Heart_Problems', 'Stroke', 'Kidney_Diease','Stomach_or_Other_Digestive_Disease',
- 'Emotional_Nervous_or_Psychiatric_Problems', 'Memory_Related_Disease','Arthritis_or_Rheumatism','Asthma']
- for (col_old, col_new) in zip(columns_to_diseases_old,columns_to_diseases_new):
- health_status[col_new] = health_status.apply(lambda x : x[col_old] if not pd.isna(x[col_old]) else np.nan, axis=1)
-
- diseases_2015 = data_2015[data_2015["wave"]=="2015"][['ID','Hypertension','Dyslipidemia','Disabetes_or_High_Blood_Sugar','Cancer_or_Malignant_Tumor','Chronic_Lung_Diseases',
- 'Liver_Disease', 'Heart_Problems', 'Stroke', 'Kidney_Diease','Stomach_or_Other_Digestive_Disease',
- 'Emotional_Nervous_or_Psychiatric_Problems', 'Memory_Related_Disease','Arthritis_or_Rheumatism','Asthma']]
-
- health_status = pd.merge(health_status, diseases_2015, on='ID', how='left', suffixes=("_2018","_2015"))
-
- for col in columns_to_diseases_new:
- health_status[col] = health_status[f'{col}_2018'].fillna(health_status[f'{col}_2015'])
- health_status_select = health_status[['ID','householdID', 'communityID', 'Hypertension','Dyslipidemia','Disabetes_or_High_Blood_Sugar','Cancer_or_Malignant_Tumor','Chronic_Lung_Diseases',
- 'Liver_Disease', 'Heart_Problems', 'Stroke', 'Kidney_Diease','Stomach_or_Other_Digestive_Disease',
- 'Emotional_Nervous_or_Psychiatric_Problems', 'Memory_Related_Disease','Arthritis_or_Rheumatism','Asthma', "Physical_activity", "Smoke", "Drink", "Accident_Or_Injury", "Fell_In_Last2Years", "Wear_Glasses" ,
- "Average_Sleep_Hours", "Average_Nap_Minutes", "Vigorous_Activity_10Min", "Moderate_Effort_10Min"
- , "Walking_10Min", "Vigorous_Activity_Days", "Moderate_Effort_Days", "Walking_Days"
- , "da056_s1", "da056_s2", "da056_s3", "da056_s4", "da056_s5", "da056_s6", "da056_s7", "da056_s8", "da056_s9"
- , "Internet_Usage_LastMonth", "Drink_PastYear"]]
- data_2018 = pd.merge(data_2018, health_status_select, on = ["ID", 'householdID', 'communityID'], how="left")
-
- cognition["dc001s1_score"] = cognition["dc001_w4"].apply(lambda x : 1 if x==1 else 0 if x==5 else np.nan)
- cognition["dc001s2_score"] = cognition["dc006_w4"].apply(lambda x : 1 if x==1 else 0 if x==5 else np.nan)
- cognition["dc001s3_score"] = cognition["dc003_w4"].apply(lambda x : 1 if x==1 else 0 if x==5 else np.nan)
- cognition["dc002_score"] = cognition["dc005_w4"].apply(lambda x : 1 if x==1 else 0 if x==5 else np.nan)
-
- cognition["dc019_score"] = cognition.apply(lambda x : 0 if x["dc014_w4_1"]==97 else 1 if pd.isna(x["dc014_w4_1"]) and x["dc014_w4_1_1"]==93 else 0 if pd.isna(x["dc014_w4_1"]) and (not x["dc014_w4_1_1"]==93) else np.nan, axis=1)
- cognition["dc020_score"] = cognition.apply(lambda x : 0 if x["dc014_w4_2"]==97 else 1 if pd.isna(x["dc014_w4_2"]) and x["dc014_w4_2_1"]==86 else 0 if pd.isna(x["dc014_w4_2"]) and (not x["dc014_w4_2_1"]==86) else np.nan, axis=1)
- cognition["dc021_score"] = cognition.apply(lambda x : 0 if x["dc014_w4_3"]==97 else 1 if pd.isna(x["dc014_w4_3"]) and x["dc014_w4_3_1"]==79 else 0 if pd.isna(x["dc014_w4_3"]) and (not x["dc014_w4_3_1"]==79) else np.nan, axis=1)
- cognition["dc022_score"] = cognition.apply(lambda x : 0 if x["dc014_w4_4"]==97 else 1 if pd.isna(x["dc014_w4_4"]) and x["dc014_w4_4_1"]==72 else 0 if pd.isna(x["dc014_w4_4"]) and (not x["dc014_w4_4_1"]==72) else np.nan, axis=1)
- cognition["dc023_score"] = cognition.apply(lambda x : 0 if x["dc014_w4_5"]==97 else 1 if pd.isna(x["dc014_w4_5"]) and x["dc014_w4_5_1"]==65 else 0 if pd.isna(x["dc014_w4_5"]) and (not x["dc014_w4_5_1"]==65) else np.nan, axis=1)
-
- cognition["dc006s1_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s1"]==1 else 0, axis=1)
- cognition["dc006s2_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s2"]==2 else 0, axis=1)
- cognition["dc006s3_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s3"]==3 else 0, axis=1)
- cognition["dc006s4_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s4"]==4 else 0, axis=1)
- cognition["dc006s5_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s5"]==5 else 0, axis=1)
- cognition["dc006s6_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s6"]==6 else 0, axis=1)
- cognition["dc006s7_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s7"]==7 else 0, axis=1)
- cognition["dc006s8_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s8"]==8 else 0, axis=1)
- cognition["dc006s9_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s9"]==9 else 0, axis=1)
- cognition["dc006s10_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s10"]==10 else 0, axis=1)
-
- cognition["dc027s1_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s1"]==1 else 0, axis=1)
- cognition["dc027s2_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s2"]==2 else 0, axis=1)
- cognition["dc027s3_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s3"]==3 else 0, axis=1)
- cognition["dc027s4_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s4"]==4 else 0, axis=1)
- cognition["dc027s5_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s5"]==5 else 0, axis=1)
- cognition["dc027s6_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s6"]==6 else 0, axis=1)
- cognition["dc027s7_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s7"]==7 else 0, axis=1)
- cognition["dc027s8_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s8"]==8 else 0, axis=1)
- cognition["dc027s9_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s9"]==9 else 0, axis=1)
- cognition["dc027s10_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s10"]==10 else 0, axis=1)
-
-
- cognition["draw_score"] = cognition["dc024_w4"].apply(lambda x : 1 if x==1 else 0 if x==5 else np.nan)
- data_2018["Cognition_score"] = cognition["dc001s1_score"] + cognition["dc001s2_score"] + \
- cognition["dc001s3_score"] + cognition["dc002_score"]+ \
- cognition["dc019_score"]+ cognition["dc020_score"] + cognition["dc021_score"]+ \
- cognition["dc022_score"]+ cognition["dc023_score"] + cognition["dc006s1_score"] + \
- cognition["dc006s2_score"] + cognition["dc006s3_score"] + cognition["dc006s4_score"] + \
- cognition["dc006s5_score"] + cognition["dc006s6_score"] + cognition["dc006s7_score"] + \
- cognition["dc006s8_score"] + cognition["dc006s9_score"] + cognition["dc006s10_score"] + \
- cognition["dc027s1_score"]+ cognition["dc027s2_score"]+ \
- cognition["dc027s3_score"]+ cognition["dc027s4_score"]+ cognition["dc027s5_score"]+ \
- cognition["dc027s6_score"]+ cognition["dc027s7_score"]+ cognition["dc027s8_score"]+ \
- cognition["dc027s9_score"]+cognition["dc027s10_score"]+\
- cognition["draw_score"]
-
- cognition["dc009_score"] = cognition["dc009"].apply(lambda x: x-1 if (not pd.isna(x)) and x <5 else np.nan)
- cognition["dc010_score"] = cognition["dc010"].apply(lambda x: x-1 if (not pd.isna(x)) and x <5 else np.nan)
- cognition["dc011_score"] = cognition["dc011"].apply(lambda x: x-1 if (not pd.isna(x)) and x <5 else np.nan)
- cognition["dc012_score"] = cognition["dc012"].apply(lambda x: x-1 if (not pd.isna(x)) and x <5 else np.nan)
- cognition["dc013_score"] = cognition["dc013"].apply(lambda x: 4-x if (not pd.isna(x)) and x <5 else np.nan)
- cognition["dc014_score"] = cognition["dc014"].apply(lambda x: x-1 if (not pd.isna(x)) and x <5 else np.nan)
- cognition["dc015_score"] = cognition["dc015"].apply(lambda x: x-1 if (not pd.isna(x)) and x <5 else np.nan)
- cognition["dc016_score"] = cognition["dc016"].apply(lambda x: 4-x if (not pd.isna(x)) and x <5 else np.nan)
- cognition["dc017_score"] = cognition["dc017"].apply(lambda x: x-1 if (not pd.isna(x)) and x <5 else np.nan)
- cognition["dc018_score"] = cognition["dc018"].apply(lambda x: x-1 if (not pd.isna(x)) and x <5 else np.nan)
- data_2018["psychiatric_score"] = cognition["dc009_score"] + cognition["dc010_score"] + cognition["dc011_score"] + \
- cognition["dc012_score"] + cognition["dc013_score"] + cognition["dc014_score"] + cognition["dc015_score"] + \
- cognition["dc016_score"] + cognition["dc017_score"] + cognition["dc018_score"]
-
-
-
-
-
- data_2018["sleep_state"] = cognition['dc015'].apply(lambda x : np.nan if x > 4 else x)
-
-
- health_status["db010_score"] = health_status["db010"].apply(lambda x : 0 if x==1 else 1 if x >= 2 else np.nan)
- health_status["db011_score"] = health_status["db011"].apply(lambda x : 0 if x==1 else 1 if x >= 2 else np.nan)
- health_status["db012_score"] = health_status["db012"].apply(lambda x : 0 if x==1 else 1 if x >= 2 else np.nan)
- health_status["db013_score"] = health_status["db013"].apply(lambda x : 0 if x==1 else 1 if x >= 2 else np.nan)
- health_status["db014_score"] = health_status["db014"].apply(lambda x : 0 if x==1 else 1 if x >= 2 else np.nan)
- health_status["db015_score"] = health_status["db015"].apply(lambda x : 0 if x==1 else 1 if x >= 2 else np.nan)
- data_2018["ADL"] = health_status["db010_score"] + health_status["db011_score"] + health_status["db012_score"] + health_status["db013_score"] + \
- health_status["db014_score"] + health_status["db015_score"]
-
-
- houseing["Gas_Connection"] = houseing["i019"].apply(lambda x : 1 if x ==1 else 0 if x == 2 else np.nan)
-
- houseing["Heating_Facility"] = houseing["i020"].apply(lambda x : 1 if x ==1 else 0 if x == 2 else np.nan)
-
-
-
-
-
-
-
-
- houseing["Heating_Energy"] = houseing["i021_w4"].apply(lambda x : 0 if x==8 or x==7 else x )
-
-
-
-
-
-
-
-
- houseing["Cooking_Fuel"] = houseing["i022_w4"].apply(lambda x : np.nan if x==8 else 0 if x==7 else x )
- houseing_select = houseing[['householdID', 'communityID','Gas_Connection',
- 'Heating_Facility', 'Heating_Energy', 'Cooking_Fuel']]
- data_2018 = pd.merge(data_2018, houseing_select, on = ['householdID', 'communityID'], how="left")
- data_2018["wave"] = year
- change_columns(data_2018)
- data_2018 = pd.concat([data_2015, data_2018], axis=0)
- print("2018 complete")
-
- year = "2020"
- demo, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Demographic_Background.dta")
- psu, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS2013/PSU.dta", encoding='gbk')
- health_status, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Health_Status_and_Functioning.dta")
- weight, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Sample_Infor.dta")
- houseing, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Household_Income.dta")
-
-
-
- demo["marital_status"] = demo.apply(lambda x : 1 if x["ba011"]==1 or x["ba011"]==2 or x["ba012"]==1 else 0 if x["ba011"] in [3,4,5,6] else np.nan, axis=1)
-
-
-
-
- demo["education"] = demo.apply(lambda x : x["ba010"] if not pd.isna(x["ba010"]) else np.nan, axis=1)
- demo["education"] = demo["education"].apply(lambda x : 1 if x == 6 or x == 7 else 2 if x in [8, 9, 10, 11] else 0 if x in [1,2,3,4,5] else np.nan)
-
- eductaion_2018 = data_2018[data_2018["wave"]=="2018"][['ID',"education"]]
-
- demo = pd.merge(demo, eductaion_2018, on='ID', how='left', suffixes=("_2020","_2018"))
-
- demo['education'] = demo['education_2020'].fillna(demo['education_2018'])
-
- demo["birth_year"] = demo.apply(lambda x : x["ba003_1"] if pd.isna(x["ba003_1"]) else np.nan, axis=1)
- demo["birth_month"] = demo.apply(lambda x : x["ba003_2"] if pd.isna(x["ba003_2"]) else np.nan, axis=1)
-
- birth_year_2018 = data_2018[data_2018["wave"]=="2018"][['ID',"birth_year", "birth_month"]]
-
- demo = pd.merge(demo, birth_year_2018, on='ID', how='left', suffixes=("_2020","_2018"))
-
- demo['birth_year'] = demo['birth_year_2020'].fillna(demo['birth_year_2018'])
- demo['birth_month'] = demo['birth_month_2020'].fillna(demo['birth_month_2018'])
-
- demo = pd.merge(demo, weight[["ID", "iyear", "imonth"]], on = "ID", how="left")
- demo["ba003"] = 1
- data_2020 = demo[['ID','householdID', 'communityID','xrgender', 'birth_year','birth_month','ba003',"iyear", "imonth", 'marital_status', 'education']]
-
-
-
- data_2020 = pd.merge(data_2020, psu[['communityID', 'province', 'city', 'urban_nbs']], on = "communityID", how="left")
-
- data_2020[['qi002', 'ql002', 'waist', 'Systolic','Diastolic']]=np.nan
-
- data_2020[["Sit_Stand_5x", "Walking_Speed_Time"]] = np.nan
-
-
- data_2020[['bl_wbc','bl_mcv','bl_plt','bl_bun','bl_glu','bl_crea','bl_cho', 'bl_tg', 'bl_hdl', 'bl_ldl','bl_crp','bl_hbalc','bl_ua', 'bl_hct', 'bl_hgb','bl_cysc']]=np.nan
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- health_status["Physical_activity"] = health_status.apply(lambda x : 2 if x["da032_1_"]==1 else
- 1 if x["da032_2_"]==1 else
- 0 if x["da032_3_"] == 1 or (x["da032_1_"]==2 and x["da032_2_"]==2 and x["da032_3_"] == 2)
- else np.nan ,axis=1)
-
-
-
-
- health_status["Smoke"] = health_status["da046"].apply(lambda x : 1 if x ==1 else 0 if x == 2 else 1)
-
-
-
- health_status["Drink"] = health_status.apply(lambda x : 1 if x["da051"] ==1 or x["da051"] ==2 else
- 0 if x["da051"] == 3 else np.nan, axis=1)
-
-
-
- health_status['Accident_Or_Injury']=health_status.apply(lambda x : 1 if x["da019"] ==1 or x["da020"]==1 else
- 0 if x["da019"] ==2 or x["da020"]==2 else np.nan, axis=1)
-
-
-
- health_status['Fell_In_Last2Years']=health_status.apply(lambda x : 1 if x["da022"] ==1 or x["da023"]==1 else
- 0 if x["da022"] ==2 or x["da023"]==2 else np.nan, axis=1)
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- health_status[['Wear_Glasses']]=np.nan
-
- health_status['Average_Sleep_Hours']=health_status["da030"]
-
- health_status['Average_Nap_Minutes']=health_status["da031"]
-
- health_status['Vigorous_Activity_10Min']=health_status["da032_1_"].apply(lambda x : 1 if x == 1 else 0 if x ==2 else np.nan)
-
- health_status['Moderate_Effort_10Min']=health_status["da032_2_"].apply(lambda x : 1 if x == 1 else 0 if x ==2 else np.nan)
-
- health_status['Walking_10Min']=health_status["da032_3_"].apply(lambda x : 1 if x == 1 else 0 if x ==2 else np.nan)
-
- health_status['Vigorous_Activity_Days']=health_status.apply(lambda x : np.nan if pd.isna(x["Vigorous_Activity_10Min"]) else 0 if pd.isna(x["da033_1_"]) else x["da033_1_"], axis=1)
-
- health_status['Moderate_Effort_Days']=health_status.apply(lambda x : np.nan if pd.isna(x["Moderate_Effort_10Min"]) else 0 if pd.isna(x["da033_2_"]) else x["da033_2_"], axis=1)
-
- health_status['Walking_Days']=health_status.apply(lambda x : np.nan if pd.isna(x["Walking_10Min"]) else 0 if pd.isna(x["da033_3_"]) else x["da033_3_"], axis=1)
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- health_status["da038_s1"] = health_status.apply(lambda x: 1 if x["da038_s1"]==1 else 0, axis=1)
- health_status["da038_s2"] = health_status.apply(lambda x: 1 if x["da038_s2"]==2 else 0, axis=1)
- health_status["da038_s3"] = health_status.apply(lambda x: 1 if x["da038_s3"]==3 else 0, axis=1)
- health_status["da038_s4"] = health_status.apply(lambda x: 1 if x["da038_s4"]==4 else 0, axis=1)
- health_status["da038_s5"] = health_status.apply(lambda x: 1 if x["da038_s5"]==5 else 0, axis=1)
- health_status["da038_s6"] = health_status.apply(lambda x: 1 if x["da038_s6"]==6 else 0, axis=1)
- health_status["da038_s7"] = health_status.apply(lambda x: 1 if x["da038_s7"]==7 else 0, axis=1)
- health_status["da038_s8"] = health_status.apply(lambda x: 1 if x["da038_s8"]==8 else 0, axis=1)
- health_status["da038_s9"] = health_status.apply(lambda x: 1 if x["da038_s9"]==9 else 0, axis=1)
-
-
-
-
-
-
-
- health_status["Internet_Usage_LastMonth"] = health_status["da040"].apply(lambda x : 1 if x ==1 else 0)
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- health_status['Drink_PastYear']=health_status["da051"]
-
-
-
-
-
-
-
-
-
-
-
- health_status['da003_12_'] = health_status.apply(process_row, axis=1)
- columns_to_diseases_old = ['da003_1_', 'da003_2_','da003_3_','da003_4_','da003_5_','da003_6_','da003_7_','da003_8_','da003_9_','da003_10_','da003_11_'
- ,'da003_12_','da003_14_','da003_15_']
- columns_to_diseases_new = ['Hypertension','Dyslipidemia','Disabetes_or_High_Blood_Sugar','Cancer_or_Malignant_Tumor','Chronic_Lung_Diseases',
- 'Liver_Disease', 'Heart_Problems', 'Stroke', 'Kidney_Diease','Stomach_or_Other_Digestive_Disease',
- 'Emotional_Nervous_or_Psychiatric_Problems', 'Memory_Related_Disease','Arthritis_or_Rheumatism','Asthma']
- for (col_old, col_new) in zip(columns_to_diseases_old,columns_to_diseases_new):
- health_status[col_new] = health_status.apply(lambda x : x[col_old] if not pd.isna(x[col_old]) else np.nan, axis=1)
-
- diseases_2018 = data_2018[data_2018["wave"]=="2018"][['ID','Hypertension','Dyslipidemia','Disabetes_or_High_Blood_Sugar','Cancer_or_Malignant_Tumor','Chronic_Lung_Diseases',
- 'Liver_Disease', 'Heart_Problems', 'Stroke', 'Kidney_Diease','Stomach_or_Other_Digestive_Disease',
- 'Emotional_Nervous_or_Psychiatric_Problems', 'Memory_Related_Disease','Arthritis_or_Rheumatism','Asthma']]
-
- health_status = pd.merge(health_status, diseases_2018, on='ID', how='left', suffixes=("_2020","_2018"))
-
- for col in columns_to_diseases_new:
- health_status[col] = health_status[f'{col}_2020'].fillna(health_status[f'{col}_2018'])
- health_status_select = health_status[['ID','householdID', 'communityID', 'Hypertension','Dyslipidemia','Disabetes_or_High_Blood_Sugar','Cancer_or_Malignant_Tumor','Chronic_Lung_Diseases',
- 'Liver_Disease', 'Heart_Problems', 'Stroke', 'Kidney_Diease','Stomach_or_Other_Digestive_Disease',
- 'Emotional_Nervous_or_Psychiatric_Problems', 'Memory_Related_Disease','Arthritis_or_Rheumatism','Asthma', "Physical_activity", "Smoke", "Drink", "Accident_Or_Injury", "Fell_In_Last2Years", "Wear_Glasses",
- "Average_Sleep_Hours", "Average_Nap_Minutes", "Vigorous_Activity_10Min", "Moderate_Effort_10Min"
- , "Walking_10Min", "Vigorous_Activity_Days", "Moderate_Effort_Days", "Walking_Days"
- , "da038_s1", "da038_s2", "da038_s3", "da038_s4", "da038_s5", "da038_s6", "da038_s7", "da038_s8", "da038_s9"
- , "Internet_Usage_LastMonth", "Drink_PastYear"]]
-
- data_2020 = pd.merge(data_2020, health_status_select, on = ["ID", 'householdID', 'communityID'], how="left")
-
-
-
- data_2020[['Heart_attack_2_years']]=np.nan
-
-
-
- data_2020[['Recurrent_Stroke']]=np.nan
-
- health_status["dc001s1_score"] = health_status["dc001"].apply(lambda x : 1 if x==1 else 0 if x==2 else np.nan)
- health_status["dc001s2_score"] = health_status["dc005"].apply(lambda x : 1 if x==1 else 0 if x==2 else np.nan)
- health_status["dc001s3_score"] = health_status["dc003"].apply(lambda x : 1 if x==1 else 0 if x==2 else np.nan)
- health_status["dc002_score"] = health_status["dc004"].apply(lambda x : 1 if x==1 else 0 if x==2 else np.nan)
- health_status["dc003_score"] = health_status["dc002"].apply(lambda x : 1 if x==1 else 0 if x==2 else np.nan)
- health_status["dc019_score"] = health_status.apply(lambda x : 0 if x["dc007_1"]==997 else 1 if x["dc007_1"] ==1 and x["dc007_1_1"]==93 else 0 if x["dc007_1"] ==1 and (not x["dc007_1_1"]==93) else np.nan, axis=1)
- health_status["dc020_score"] = health_status.apply(lambda x : 0 if x["dc007_2"]==997 else 1 if x["dc007_2"] ==1 and x["dc007_2_1"]==86 else 0 if x["dc007_2"] ==1 and (not x["dc007_2_1"]==86) else np.nan, axis=1)
- health_status["dc021_score"] = health_status.apply(lambda x : 0 if x["dc007_3"]==997 else 1 if x["dc007_3"] ==1 and x["dc007_3_1"]==79 else 0 if x["dc007_3"] ==1 and (not x["dc007_3_1"]==79) else np.nan, axis=1)
- health_status["dc022_score"] = health_status.apply(lambda x : 0 if x["dc007_4"]==997 else 1 if x["dc007_4"] ==1 and x["dc007_4_1"]==72 else 0 if x["dc007_4"] ==1 and (not x["dc007_4_1"]==72) else np.nan, axis=1)
- health_status["dc023_score"] = health_status.apply(lambda x : 0 if x["dc007_5"]==997 else 1 if x["dc007_5"] ==1 and x["dc007_5_1"]==65 else 0 if x["dc007_5"] ==1 and (not x["dc007_5_1"]==65) else np.nan, axis=1)
-
- health_status["dc006s1_score"] = health_status.apply(lambda x : np.nan if not x["xwordrecallbr"] ==1 else 1 if x["dc012_s1"]==1 else 0, axis=1)
- health_status["dc006s2_score"] = health_status.apply(lambda x : np.nan if not x["xwordrecallbr"] ==1 else 1 if x["dc012_s2"]==2 else 0, axis=1)
- health_status["dc006s3_score"] = health_status.apply(lambda x : np.nan if not x["xwordrecallbr"] ==1 else 1 if x["dc012_s3"]==3 else 0, axis=1)
- health_status["dc006s4_score"] = health_status.apply(lambda x : np.nan if not x["xwordrecallbr"] ==1 else 1 if x["dc012_s4"]==4 else 0, axis=1)
- health_status["dc006s5_score"] = health_status.apply(lambda x : np.nan if not x["xwordrecallbr"] ==1 else 1 if x["dc012_s5"]==5 else 0, axis=1)
- health_status["dc006s6_score"] = health_status.apply(lambda x : np.nan if not x["xwordrecallbr"] ==1 else 1 if x["dc012_s6"]==6 else 0, axis=1)
- health_status["dc006s7_score"] = health_status.apply(lambda x : np.nan if not x["xwordrecallbr"] ==1 else 1 if x["dc012_s7"]==7 else 0, axis=1)
- health_status["dc006s8_score"] = health_status.apply(lambda x : np.nan if not x["xwordrecallbr"] ==1 else 1 if x["dc012_s8"]==8 else 0, axis=1)
- health_status["dc006s9_score"] = health_status.apply(lambda x : np.nan if not x["xwordrecallbr"] ==1 else 1 if x["dc012_s9"]==9 else 0, axis=1)
- health_status["dc006s10_score"] = health_status.apply(lambda x : np.nan if not x["xwordrecallbr"] ==1 else 1 if x["dc012_s10"]==10 else 0, axis=1)
- health_status["dc027s1_score"] = health_status.apply(lambda x : np.nan if not x["xwordrecallbr"] ==1 else 1 if x["dc028_s1"]==1 else 0, axis=1)
- health_status["dc027s2_score"] = health_status.apply(lambda x : np.nan if not x["xwordrecallbr"] ==1 else 1 if x["dc028_s2"]==2 else 0, axis=1)
- health_status["dc027s3_score"] = health_status.apply(lambda x : np.nan if not x["xwordrecallbr"] ==1 else 1 if x["dc028_s3"]==3 else 0, axis=1)
- health_status["dc027s4_score"] = health_status.apply(lambda x : np.nan if not x["xwordrecallbr"] ==1 else 1 if x["dc028_s4"]==4 else 0, axis=1)
- health_status["dc027s5_score"] = health_status.apply(lambda x : np.nan if not x["xwordrecallbr"] ==1 else 1 if x["dc028_s5"]==5 else 0, axis=1)
- health_status["dc027s6_score"] = health_status.apply(lambda x : np.nan if not x["xwordrecallbr"] ==1 else 1 if x["dc028_s6"]==6 else 0, axis=1)
- health_status["dc027s7_score"] = health_status.apply(lambda x : np.nan if not x["xwordrecallbr"] ==1 else 1 if x["dc028_s7"]==7 else 0, axis=1)
- health_status["dc027s8_score"] = health_status.apply(lambda x : np.nan if not x["xwordrecallbr"] ==1 else 1 if x["dc028_s8"]==8 else 0, axis=1)
- health_status["dc027s9_score"] = health_status.apply(lambda x : np.nan if not x["xwordrecallbr"] ==1 else 1 if x["dc028_s9"]==9 else 0, axis=1)
- health_status["dc027s10_score"] = health_status.apply(lambda x : np.nan if not x["xwordrecallbr"] ==1 else 1 if x["dc028_s10"]==10 else 0, axis=1)
-
- health_status["draw_score"] = health_status["dc009"].apply(lambda x : 1 if x==1 else 0 if x==2 else np.nan)
- data_2020["Cognition_score"] = health_status["dc001s1_score"] + health_status["dc001s2_score"] + \
- health_status["dc001s3_score"] + health_status["dc002_score"]+ \
- health_status["dc019_score"]+ health_status["dc020_score"] + health_status["dc021_score"]+ \
- health_status["dc022_score"]+ health_status["dc023_score"] + health_status["dc006s1_score"] + \
- health_status["dc006s2_score"] + health_status["dc006s3_score"] + health_status["dc006s4_score"] + \
- health_status["dc006s5_score"] + health_status["dc006s6_score"] + health_status["dc006s7_score"] + \
- health_status["dc006s8_score"] + health_status["dc006s9_score"] + health_status["dc006s10_score"] + \
- health_status["dc027s1_score"]+ health_status["dc027s2_score"]+ \
- health_status["dc027s3_score"]+ health_status["dc027s4_score"]+ health_status["dc027s5_score"]+ \
- health_status["dc027s6_score"]+ health_status["dc027s7_score"]+ health_status["dc027s8_score"]+ \
- health_status["dc027s9_score"]+health_status["dc027s10_score"]+\
- health_status["draw_score"]
-
- health_status["dc009_score"] = health_status["dc016"].apply(lambda x: x-1 if (not pd.isna(x)) and x <5 else np.nan)
- health_status["dc010_score"] = health_status["dc017"].apply(lambda x: x-1 if (not pd.isna(x)) and x <5 else np.nan)
- health_status["dc011_score"] = health_status["dc018"].apply(lambda x: x-1 if (not pd.isna(x)) and x <5 else np.nan)
- health_status["dc012_score"] = health_status["dc019"].apply(lambda x: x-1 if (not pd.isna(x)) and x <5 else np.nan)
- health_status["dc013_score"] = health_status["dc020"].apply(lambda x: 4-x if (not pd.isna(x)) and x <5 else np.nan)
- health_status["dc014_score"] = health_status["dc021"].apply(lambda x: x-1 if (not pd.isna(x)) and x <5 else np.nan)
- health_status["dc015_score"] = health_status["dc022"].apply(lambda x: x-1 if (not pd.isna(x)) and x <5 else np.nan)
- health_status["dc016_score"] = health_status["dc023"].apply(lambda x: 4-x if (not pd.isna(x)) and x <5 else np.nan)
- health_status["dc017_score"] = health_status["dc024"].apply(lambda x: x-1 if (not pd.isna(x)) and x <5 else np.nan)
- health_status["dc018_score"] = health_status["dc025"].apply(lambda x: x-1 if (not pd.isna(x)) and x <5 else np.nan)
- data_2020["psychiatric_score"] = health_status["dc009_score"] + health_status["dc010_score"] + health_status["dc011_score"] + \
- health_status["dc012_score"] + health_status["dc013_score"] + health_status["dc014_score"] + health_status["dc015_score"] + \
- health_status["dc016_score"] + health_status["dc017_score"] + health_status["dc018_score"]
-
-
-
-
-
-
- data_2020["sleep_state"] = health_status['dc022'].apply(lambda x : np.nan if x >900 else x)
-
- health_status["db010_score"] = health_status["db001"].apply(lambda x : 0 if x==1 else 1 if x >= 2 else np.nan)
- health_status["db011_score"] = health_status["db003"].apply(lambda x : 0 if x==1 else 1 if x >= 2 else np.nan)
- health_status["db012_score"] = health_status["db005"].apply(lambda x : 0 if x==1 else 1 if x >= 2 else np.nan)
- health_status["db013_score"] = health_status["db007"].apply(lambda x : 0 if x==1 else 1 if x >= 2 else np.nan)
- health_status["db014_score"] = health_status["db009"].apply(lambda x : 0 if x==1 else 1 if x >= 2 else np.nan)
- health_status["db015_score"] = health_status["db011"].apply(lambda x : 0 if x==1 else 1 if x >= 2 else np.nan)
- data_2020["ADL"] = health_status["db010_score"] + health_status["db011_score"] + health_status["db012_score"] + health_status["db013_score"] + \
- health_status["db014_score"] + health_status["db015_score"]
-
-
- houseing["Gas_Connection"] = houseing["i018"].apply(lambda x : 1 if x ==1 else 0 if x == 2 else np.nan)
-
- houseing["Heating_Facility"] = houseing["i019"].apply(lambda x : 1 if x ==1 else 0 if x == 2 else np.nan)
-
-
-
-
-
-
-
-
- houseing["Heating_Energy"] = houseing["i020"].apply(lambda x : np.nan if x==8 else 0 if x==7 else x )
-
-
-
-
-
-
-
-
- houseing["Cooking_Fuel"] = houseing["i021"].apply(lambda x : np.nan if x==9 else 0 if x == 8 or x == 7 else x)
- houseing_select = houseing[['householdID', 'communityID','Gas_Connection',
- 'Heating_Facility', 'Heating_Energy', 'Cooking_Fuel']]
- data_2020 = pd.merge(data_2020, houseing_select, on = ['householdID', 'communityID'], how="left")
- data_2020["wave"] = year
- change_columns(data_2020)
- data_2020 = pd.concat([data_2018, data_2020], axis=0)
-
-
-
- data_2020['city'] = data_2020['city'].replace('海东地区', '海东市')
-
- data_2020['city'] = data_2020['city'].replace('北京', '北京市')
- data_2020['province'] = data_2020['province'].replace('北京', '北京市')
-
- data_2020['city'] = data_2020['city'].replace('哈尔滨', '哈尔滨市')
-
- data_2020['city'] = data_2020['city'].replace('天津', '天津市')
- data_2020['province'] = data_2020['province'].replace('天津', '天津市')
-
- data_2020['province'] = data_2020['province'].replace('广西省', '广西壮族自治区')
-
- data_2020['city'] = data_2020['city'].replace('巢湖市', '合肥市')
-
- data_2020['city'] = data_2020['city'].replace('襄樊市', '襄阳市')
- data_2020.to_csv("charls_paper_2.csv", index=False)
- print(123)
|