12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789 |
- import pandas as pd
- import math
- import numpy as np
- if __name__ == "__main__":
- # age
- # sex
- # marital status
- # education
- # smoking status
- # drinking status
- # physical activity level
- # body mass index (BMI)
- # glycated haemoglobin (HbA1c)
- # systolic blood pressure (SBP)
- # high-density lipoprotein cholesterol (HDL-C)
- # C-reactive protein
- # 获取1992数据
- with(open("/root/r_base/HRS/1992/data/health.da", "r", encoding="utf-8") )as file:
- HHID_list = []
- PN_list = []
- BIRTH_YEAR_list = []
- SEX_list = []
- MARITAL_STATUS_list = []
- EDUCATION_list = []
- SMOKED_list = []
- DRINKED_list = []
- PHYSICAL_ACTIVITY_LEVEL_list = []
- BMI_list = []
- HEART_PROBL_list = []
- STROKE_list=[]
- # 逐行读取文件
- for line in file:
- HHID = line[0:6]
- PN = line[6:9]
- BIRTH_YEAR = line[249:254]
- # 1.Male
- # 2.Female
- SEX = line[109:110]
- # 1. Married [Inap in V228-V238]
- # 2. Partner [Inap in V226-V227]
- # 3. Separated [Inap in V226-V234]
- # 4. Divorced [Inap in V226-V234]
- # 5. Widowed [Inap in V226-V234]
- # 6. Never married
- # 7. Married with 2 family residences--both
- # sampleable
- # 8. Married with 2 family residences--one
- # residence is not sampleable (institution
- # or out of the country)
- # 9. NA
- MARITAL_STATUS = line[302:303]
- # 1 Married or Partner; 5 other
- MARITAL_STATUS = "1" if MARITAL_STATUS=="1" or MARITAL_STATUS=="2" or MARITAL_STATUS=="7" or MARITAL_STATUS=="8" else "5" if not MARITAL_STATUS=="9" else np.nan
- # 0 For no formal education
- # 1-11 .....Grades
- # 12 .......High school
- # 13-15 ....Some college
- # 16 .......College grad
- # 17 .......Post college (17+ years)
- # 97 .......Other
- EDUCATION = line[264:266]
- # 1. Yes
- # 5. No [Inap in V502-V505]
- SMOKED = line[519:520]
- # 1. Yes
- # 5. No [Inap in V507]
- DRINKED = line[527:528]
- # 3. vigorous (vigorous activity more than once a week)
- # 2. moderate (moderate activity more than once a week)
- # 1. inactive (the rest)
- # 重度活动
- VIGOROUS_PHYSICAL = line[534:535]
- # 轻度活动
- LIGHT_PHYSICAL = line[533:534]
- PHYSICAL_ACTIVITY_LEVEL = 3 if VIGOROUS_PHYSICAL == "1" or VIGOROUS_PHYSICAL =="2" else 2 if LIGHT_PHYSICAL=="1" or LIGHT_PHYSICAL=="2" else 1
- # 体重
- WEIGH= float(line[536:539])*0.45359237
- # 身高
- HIGHT = float(line[542:543])*0.3048 + float(line[543:545])*0.0254
- # BMI
- BMI = WEIGH / math.pow(HIGHT,2)
- # A heart attack, coronary heart disease, angina, congestive heart failure, or other heart problems
- # 1. Yes
- # 5. No [Inap in V418-V421]
- HEART_PROBL = line[459:460]
- # STROKE
- STROKE = line[473:474]
- HHID_list.append(HHID)
- PN_list.append(PN)
- BIRTH_YEAR_list.append(BIRTH_YEAR)
- SEX_list.append(SEX)
- MARITAL_STATUS_list.append(MARITAL_STATUS)
- EDUCATION_list.append(EDUCATION)
- SMOKED_list.append(SMOKED)
- DRINKED_list.append(DRINKED)
- PHYSICAL_ACTIVITY_LEVEL_list.append(PHYSICAL_ACTIVITY_LEVEL)
- BMI_list.append(BMI)
- HEART_PROBL_list.append(HEART_PROBL)
- STROKE_list.append(STROKE)
- data = {
- "HHID":HHID_list,
- "PN":PN_list,
- "BIRTH_YEAR":BIRTH_YEAR_list,
- "SEX":SEX_list,
- "MARITAL_STATUS":MARITAL_STATUS_list,
- "EDUCATION":EDUCATION_list,
- "SMOKED":SMOKED_list,
- "DRINKED":DRINKED_list,
- "PHYSICAL_ACTIVITY_LEVEL":PHYSICAL_ACTIVITY_LEVEL_list,
- "BMI":BMI_list,
- "HEART_PROBL":HEART_PROBL_list,
- "STROKE":STROKE_list
- }
- data["WAVE"] = 1992
- result = pd.DataFrame(data)
- # 获取1993数据
- with(open("/root/r_base/HRS/1993/data/BR21.DA", "r", encoding="utf-8") )as file:
- HHID_list = []
- PN_list = []
- BIRTH_YEAR_list = []
- SEX_list = []
- MARITAL_STATUS_list = []
- EDUCATION_list = []
- SMOKED_list = []
- DRINKED_list = []
- PHYSICAL_ACTIVITY_LEVEL_list = []
- BMI_list = []
- HEART_PROBL_list = []
- STROKE_list=[]
- # 逐行读取文件
- for line in file:
- HHID = line[0:6]
- PN = line[6:9]
- BIRTH_YEAR = line[61:65]
- # 1.Male
- # 2.Female
- SEX = line[16:17]
- # MARRIED, SPOUSE PRESENT........... 1
- # MARRIED, SPOUSE ABSENT............ 2
- # LIVING WITH SOMEONE............... 3 GO TO A11b
- # DIVORCED/SEPARATED................ 4 GO TO A11g
- # WIDOWED........................... 5 GO TO A11g
- # NEVER MARRIED..................... 6 GO TO B1
- MARITAL_STATUS = line[98:99]
- # 1 Married or Partner; 5 other
- MARITAL_STATUS = "1" if MARITAL_STATUS=="1" or MARITAL_STATUS=="2" or MARITAL_STATUS=="3" else "5"
- # 0 For no formal education
- # 1-11 .....Grades
- # 12 .......High school
- # 13-15 ....Some college
- # 16 .......College grad
- # 17 .......Post college (17+ years)
- # 97 .......Other
- EDUCATION = line[74:76]
- #CURRENT SMOKER..................... 1
- # FORMER SMOKER...................... 2 GO TO B20
- # NEVER SMOKED....................... 3 GO TO B20
- # 1. Yes
- # 5. No [Inap in V502-V505]
- SMOKED = "1" if line[172:173] == "1" or line[172:173] == "2" else "5"
- # 1. Yes
- # 5. No [Inap in V507]
- DRINKED = line[176:177]
- # 3. vigorous (vigorous activity more than once a week)
- # 2. moderate (moderate activity more than once a week)
- # 1. inactive (the rest)
- # 重度活动
- VIGOROUS_PHYSICAL = np.nan
- # 轻度活动
- LIGHT_PHYSICAL = np.nan
- PHYSICAL_ACTIVITY_LEVEL = np.nan
- # 体重
- WEIGH= float(line[179:182])*0.45359237
- # 身高
- HEIGHT = float(line[182:184])*0.0254
- # BMI
- BMI = WEIGH / math.pow(HEIGHT,2)
- # A heart attack, coronary heart disease, angina, congestive heart failure, or other heart problems
- # 1. Yes
- # 5. No [Inap in V418-V421]
- HEART_PROBL = line[139:140]
- # STROKE
- STROKE = line[142:143]
- HHID_list.append(HHID)
- PN_list.append(PN)
- BIRTH_YEAR_list.append(BIRTH_YEAR)
- SEX_list.append(SEX)
- MARITAL_STATUS_list.append(MARITAL_STATUS)
- EDUCATION_list.append(EDUCATION)
- SMOKED_list.append(SMOKED)
- DRINKED_list.append(DRINKED)
- PHYSICAL_ACTIVITY_LEVEL_list.append(PHYSICAL_ACTIVITY_LEVEL)
- BMI_list.append(BMI)
- HEART_PROBL_list.append(HEART_PROBL)
- STROKE_list.append(STROKE)
- data = {
- "HHID":HHID_list,
- "PN":PN_list,
- "BIRTH_YEAR":BIRTH_YEAR_list,
- "SEX":SEX_list,
- "MARITAL_STATUS":MARITAL_STATUS_list,
- "EDUCATION":EDUCATION_list,
- "SMOKED":SMOKED_list,
- "DRINKED":DRINKED_list,
- "PHYSICAL_ACTIVITY_LEVEL":PHYSICAL_ACTIVITY_LEVEL_list,
- "BMI":BMI_list,
- "HEART_PROBL":HEART_PROBL_list,
- "STROKE":STROKE_list
- }
- data["WAVE"] = 1993
- result_1993 = pd.DataFrame(data)
- result = pd.concat([result, result_1993], axis=0)
- # 获取1994数据
- with(open("/root/r_base/HRS/1994/data/W2a.da", "r", encoding="utf-8") )as file:
- HHID_list = []
- PN_list = []
- BIRTH_YEAR_list = []
- SEX_list = []
- MARITAL_STATUS_list = []
- EDUCATION_list = []
- # 逐行读取文件
- for line in file:
- HHID = line[0:6]
- PN = line[6:9]
- BIRTH_YEAR = line[26:30]
- # 1.Male
- # 2.Female
- SEX = line[22:23]
- # 1. Married [Inap in V228-V238]
- # 2. Partner [Inap in V226-V227]
- # 3. Separated [Inap in V226-V234]
- # 4. Divorced [Inap in V226-V234]
- # 5. Widowed [Inap in V226-V234]
- # 6. Never married
- # 7. Married (Not Institutionalized/not out of country)
- # 8. Married (Institutionalized/out of country)
- # 9. NA
- MARITAL_STATUS = line[55:57]
- # 1 Married or Partner; 5 other
- MARITAL_STATUS = "1" if MARITAL_STATUS=="1" or MARITAL_STATUS=="2" or MARITAL_STATUS=="7" or MARITAL_STATUS=="8" else "5" if not MARITAL_STATUS=="9" else np.nan
- # 0 For no formal education
- # 1-11 .....Grades
- # 12 .......High school
- # 13-15 ....Some college
- # 16 .......College grad
- # 17 .......Post college (17+ years)
- # 97 .......Other
- # 98. Don't Know; DK
- # 99. Not Ascertained; NA
- EDUCATION = line[112:115]
- HHID_list.append(HHID)
- PN_list.append(PN)
- BIRTH_YEAR_list.append(BIRTH_YEAR)
- SEX_list.append(SEX)
- MARITAL_STATUS_list.append(MARITAL_STATUS)
- EDUCATION_list.append(EDUCATION)
- data = {
- "HHID":HHID_list,
- "PN":PN_list,
- "BIRTH_YEAR":BIRTH_YEAR_list,
- "SEX":SEX_list,
- "MARITAL_STATUS":MARITAL_STATUS_list,
- "EDUCATION":EDUCATION_list,
- }
- data["WAVE"] = 1994
- result_1994_one = pd.DataFrame(data)
- with(open("/root/r_base/HRS/1994/data/W2B.DA", "r", encoding="utf-8") )as file:
- HHID_list = []
- PN_list = []
- SMOKED_list = []
- DRINKED_list = []
- PHYSICAL_ACTIVITY_LEVEL_list = []
- BMI_list = []
- HEART_PROBL_list = []
- STROKE_list=[]
- # 逐行读取文件
- for line in file:
- HHID = line[0:6]
- PN = line[6:9]
- #1. Yes
- #5. No [GO TO B41]
- #8. Don't Know; DK [GO TO B41]
- #9. Refused; RF [GO TO B41]
- #0. Inap.
- # Proxy interview for deceased Wave-1 R
- SMOKED = line[356:358]
- #1. Yes
- #5. No [GO TO B41]
- #8. Don't Know; DK [GO TO B41]
- #9. Refused; RF [GO TO B41]
- #0. Inap.
- # Proxy interview for deceased Wave-1 R
- DRINKED = line[367:369]
- # 3. vigorous (vigorous activity more than once a week)
- # 2. moderate (moderate activity more than once a week)
- # 1. inactive (the rest)
- # 活动单位
- # 02. Week
- # 04. Month
- # 06. Year
- # 07. Other (specify)
- # 11. Day
- # 98. Don't Know/Not Ascertained; DK/NA
- # 99. Refused; RF
- # 00. Inap.
- # Proxy interview for deceased Wave-1 R
- # [B42a: or B42=995-999]
- # [B43a: or B43=995-999]
- # 重度活动
- VIGOROUS_PHYSICAL = line[378:382]
- VIGOROUS_UNIT = line[382:385]
- VIGOROUS_PHYSICAL_FLAG = np.nan
- if VIGOROUS_UNIT == "02" and float(VIGOROUS_PHYSICAL)>0 and not VIGOROUS_PHYSICAL=="994" and not float(VIGOROUS_PHYSICAL)>995:
- VIGOROUS_PHYSICAL_FLAG = True
- if VIGOROUS_UNIT == "04" and float(VIGOROUS_PHYSICAL)>3 and not VIGOROUS_PHYSICAL=="994" and not float(VIGOROUS_PHYSICAL)>995:
- VIGOROUS_PHYSICAL_FLAG = True
- if VIGOROUS_UNIT == "06" and float(VIGOROUS_PHYSICAL)>51 and not VIGOROUS_PHYSICAL=="994" and not float(VIGOROUS_PHYSICAL)>995:
- VIGOROUS_PHYSICAL_FLAG = True
- if VIGOROUS_PHYSICAL=="00":
- VIGOROUS_PHYSICAL_FLAG = False
- # 轻度活动
- LIGHT_PHYSICAL = line[371:375]
- LIGHT_UNIT = line[375:378]
- # 判断是否符合轻运动 1符合;0不符合
- LIGHT_PHYSICAL_FLAG = np.nan
- if LIGHT_UNIT == "02" and float(LIGHT_PHYSICAL)>0 and not LIGHT_PHYSICAL=="994" and not float(LIGHT_PHYSICAL)>995:
- LIGHT_PHYSICAL_FLAG = True
- if LIGHT_UNIT == "04" and float(LIGHT_PHYSICAL)>3 and not LIGHT_PHYSICAL=="994" and not float(LIGHT_PHYSICAL)>995:
- LIGHT_PHYSICAL_FLAG = True
- if LIGHT_UNIT == "06" and float(LIGHT_PHYSICAL)>51 and not LIGHT_PHYSICAL=="994" and not float(LIGHT_PHYSICAL)>995:
- LIGHT_PHYSICAL_FLAG = True
- if LIGHT_PHYSICAL=="00":
- LIGHT_PHYSICAL_FLAG = False
- PHYSICAL_ACTIVITY_LEVEL = 3 if VIGOROUS_PHYSICAL_FLAG == True else 2 if LIGHT_PHYSICAL_FLAG==True else 1 if LIGHT_PHYSICAL_FLAG==False or VIGOROUS_PHYSICAL_FLAG==False else np.nan
- # 体重
- WEIGH= float(line[385:389])*0.45359237 if not float(line[385:389])>500 else np.nan
- # 身高
- HIGHT = float(line[389:392])*0.3048 + float(line[392:395])*0.0254 if not float(line[389:392])>95 and not float(line[392:395])>95 else np.nan
- # BMI
- BMI = WEIGH / math.pow(HIGHT,2) if not np.isnan(WEIGH) and not np.isnan(HIGHT) else np.nan
- # A heart attack, coronary heart disease, angina, congestive heart failure, or other heart problems
- # 1. Yes
- # 5. No [Inap in V418-V421]
- HEART_PROBL = line[147:149] if not line[147:149]=="8" and not line[147:149]=="9" and not line[147:149]=="0" else np.nan
- # STROKE
- STROKE = line[173:175] if not line[173:175]=="8" and not line[173:175]=="9" and not line[173:175]=="0" else np.nan
- HHID_list.append(HHID)
- PN_list.append(PN)
- SMOKED_list.append(SMOKED)
- DRINKED_list.append(DRINKED)
- PHYSICAL_ACTIVITY_LEVEL_list.append(PHYSICAL_ACTIVITY_LEVEL)
- BMI_list.append(BMI)
- HEART_PROBL_list.append(HEART_PROBL)
- STROKE_list.append(STROKE)
- data = {
- "HHID":HHID_list,
- "PN":PN_list,
- "SMOKED":SMOKED_list,
- "DRINKED":DRINKED_list,
- "PHYSICAL_ACTIVITY_LEVEL":PHYSICAL_ACTIVITY_LEVEL_list,
- "BMI":BMI_list,
- "HEART_PROBL":HEART_PROBL_list,
- "STROKE":STROKE_list
- }
- result_1994_two = pd.DataFrame(data)
- result_1994 = pd.merge(result_1994_one, result_1994_two, on=["HHID", "PN"], how="left")
- result = pd.concat([result, result_1994], axis=0)
- print(result.head())
- # 获取1995数据
- with(open("/root/r_base/HRS/1995/data/A95A_R.DA", "r", encoding="utf-8") )as file:
- HHID_list = []
- PN_list = []
- BIRTH_YEAR_list = []
- MARITAL_STATUS_list = []
- EDUCATION_list = []
- # 逐行读取文件
- for line in file:
- HHID = line[0:6]
- PN = line[6:9]
- BIRTH_YEAR = line[30:34]
- # MARRIED, SPOUSE PRESENT........... 1
- # MARRIED, SPOUSE ABSENT............ 2
- # LIVING WITH SOMEONE............... 3 GO TO A11b
- # DIVORCED/SEPARATED................ 4 GO TO A11g
- # WIDOWED........................... 5 GO TO A11g
- # NEVER MARRIED..................... 6 GO TO B1
- # 0. Exit proxy was taken before the interview with the surviving spouse.
- # 7. MARRIED, SPOUSE ABSENT (NOT INSTITUTION)
- MARITAL_STATUS = line[76:77]
- # 1 Married or Partner; 5 other
- MARITAL_STATUS = "1" if MARITAL_STATUS=="1" or MARITAL_STATUS=="2" or MARITAL_STATUS=="3" else np.nan if MARITAL_STATUS=="0" or MARITAL_STATUS=="7" else "5"
- # 0 For no formal education
- # 1-11 .....Grades
- # 12 .......High school
- # 13-15 ....Some college
- # 16 .......College grad
- # 17 .......Post college (17+ years)
- # 97 .......Other
- # 98. DK (don't know); NA (not ascertained)
- # 99. RF (refused)
- EDUCATION = np.nan if line[49:51] == "97" or line[49:51] == "98" or line[49:51] == "99" else line[49:51]
- HHID_list.append(HHID)
- PN_list.append(PN)
- BIRTH_YEAR_list.append(BIRTH_YEAR)
- MARITAL_STATUS_list.append(MARITAL_STATUS)
- EDUCATION_list.append(EDUCATION)
- data = {
- "HHID":HHID_list,
- "PN":PN_list,
- "BIRTH_YEAR":BIRTH_YEAR_list,
- "MARITAL_STATUS":MARITAL_STATUS_list,
- "EDUCATION":EDUCATION_list,
- }
- data["WAVE"] = 1995
- result_1995_one = pd.DataFrame(data)
- with(open("/root/r_base/HRS/1995/data/A95CS_R.DA", "r", encoding="utf-8") )as file:
- HHID_list = []
- PN_list = []
- SEX_list = []
- # 逐行读取文件
- for line in file:
- HHID = line[0:6]
- PN = line[6:9]
- # 1.Male
- # 2.Female
- SEX = line[36:37] if not line[36:37]=="0" else np.nan
- HHID_list.append(HHID)
- PN_list.append(PN)
- SEX_list.append(SEX)
- data = {
- "HHID":HHID_list,
- "PN":PN_list,
- "SEX":SEX_list,
- }
- result_1995_two = pd.DataFrame(data)
- result_1995 = pd.merge(result_1995_one, result_1995_two, on=["HHID", "PN"], how="left")
- with(open("/root/r_base/HRS/1995/data/A95B_R.DA", "r", encoding="utf-8") )as file:
- HHID_list = []
- PN_list = []
- SMOKED_list = []
- DRINKED_list = []
- PHYSICAL_ACTIVITY_LEVEL_list = []
- BMI_list = []
- HEART_PROBL_list = []
- STROKE_list=[]
- # 逐行读取文件
- for line in file:
- HHID = line[0:6]
- PN = line[6:9]
- # 1. YES
- # 2. Cigars
- # 5. NO
- # 7. Other
- # 8. DK (don't know); NA (not ascertained)
- # 9. RF (refused)
- # 合并后
- # 1. Yes
- # 5. No [Inap in V502-V505]
- SMOKED = "1" if line[153:154] == "1" or line[153:154] == "2" else "5" if line[153:154] == "5" else np.nan
- # 1. YES
- # 3. [VOL] NEVER HAVE USED ALCOHOL
- # 5. NO
- # 7. Other
- # 8. DK (don't know); NA (not ascertained)
- # 9. RF (refused)
- # 合并后
- # 1. Yes
- # 5. No [Inap in V502-V505]
- DRINKED = "1" if line[157:158] == "1" else "5" if line[157:158] == "5" or line[157:158] == "3" else np.nan
- # 3. vigorous (vigorous activity more than once a week)
- # 2. moderate (moderate activity more than once a week)
- # 1. inactive (the rest)
- # 重度活动
- VIGOROUS_PHYSICAL = np.nan
- # 轻度活动
- LIGHT_PHYSICAL = np.nan
- PHYSICAL_ACTIVITY_LEVEL = np.nan
- # 体重
- WEIGH= float(line[164:167])*0.45359237 if not float(line[164:167])>400 else np.nan
- # 身高
- HIGHT = line[168:169]
- if not line[168:169] == " " and not line[168:169] == "8" and not line[168:169] == "9" and not pd.isna(WEIGH):
- HIGHT = float(line[168:169])*0.3048 + float(line[169:171])*0.0254
- # BMI
- BMI = WEIGH / math.pow(HEIGHT,2)
- else :
- BMI = np.nan
- # 1. YES
- # 3. [VOL] DISPUTES W1 RECORD
- # 5. NO
- # 7. Other
- # 8. DK (don't know); NA (not ascertained)
- # 9. RF (refused)
- # A heart attack, coronary heart disease, angina, congestive heart failure, or other heart problems
- # 1. Yes
- # 3. [VOL] DISPUTES W1 RECORD
- # 5. No [Inap in V418-V421]
- HEART_PROBL = np.nan if line[63:64] == "8" else line[63:64]
- # STROKE
- # 1. Yes
- # 3. [VOL] DISPUTES W1 RECORD
- # 5. No [Inap in V418-V421]
- STROKE = np.nan if line[84:85] == "8" else "5" if line[84:85] == "2" or line[84:85] == "5" else line[84:85]
- HHID_list.append(HHID)
- PN_list.append(PN)
- SMOKED_list.append(SMOKED)
- DRINKED_list.append(DRINKED)
- PHYSICAL_ACTIVITY_LEVEL_list.append(PHYSICAL_ACTIVITY_LEVEL)
- BMI_list.append(BMI)
- HEART_PROBL_list.append(HEART_PROBL)
- STROKE_list.append(STROKE)
- data = {
- "HHID":HHID_list,
- "PN":PN_list,
- "SMOKED":SMOKED_list,
- "DRINKED":DRINKED_list,
- "PHYSICAL_ACTIVITY_LEVEL":PHYSICAL_ACTIVITY_LEVEL_list,
- "BMI":BMI_list,
- "HEART_PROBL":HEART_PROBL_list,
- "STROKE":STROKE_list
- }
- result_1995_three = pd.DataFrame(data)
- result_1995 = pd.merge(result_1995, result_1995_three, on=["HHID", "PN"], how="left")
- result = pd.concat([result, result_1995], axis=0)
- # 获取1996数据
- with(open("/root/r_base/HRS/1996/data/H96A_R.DA", "r", encoding="utf-8") )as file:
- HHID_list = []
- PN_list = []
- BIRTH_YEAR_list = []
- MARITAL_STATUS_list = []
- EDUCATION_list = []
- # 逐行读取文件
- for line in file:
- HHID = line[0:6]
- PN = line[6:9]
- BIRTH_YEAR = line[25:29]
- # MARRIED, SPOUSE PRESENT........... 1
- # MARRIED, SPOUSE ABSENT............ 2
- # LIVING WITH SOMEONE............... 3 GO TO A11b
- # DIVORCED/SEPARATED................ 4 GO TO A11g
- # WIDOWED........................... 5 GO TO A11g
- # NEVER MARRIED..................... 6 GO TO B1
- # 7. MARRIED, SPOUSE ABSENT (NOT INSTITUTION)
- MARITAL_STATUS = line[70:71]
- # 1 Married or Partner; 5 other
- MARITAL_STATUS = "1" if MARITAL_STATUS=="1" or MARITAL_STATUS=="2" or MARITAL_STATUS=="3" else "5" if MARITAL_STATUS=="4" or MARITAL_STATUS=="5" or MARITAL_STATUS=="6" or MARITAL_STATUS=="7" else np.nan
- # 0 For no formal education
- # 1-11 .....Grades
- # 12 .......High school
- # 13-15 ....Some college
- # 16 .......College grad
- # 17 .......Post college (17+ years)
- # 97 .......Other
- # 98. DK (don't know); NA (not ascertained)
- # 99. RF (refused)
- EDUCATION = np.nan if line[43:45] == "97" or line[43:45] == "98" or line[43:45] == "99" else line[43:45]
- HHID_list.append(HHID)
- PN_list.append(PN)
- BIRTH_YEAR_list.append(BIRTH_YEAR)
- MARITAL_STATUS_list.append(MARITAL_STATUS)
- EDUCATION_list.append(EDUCATION)
- data = {
- "HHID":HHID_list,
- "PN":PN_list,
- "BIRTH_YEAR":BIRTH_YEAR_list,
- "MARITAL_STATUS":MARITAL_STATUS_list,
- "EDUCATION":EDUCATION_list,
- }
- data["WAVE"] = 1996
- result_1996_one = pd.DataFrame(data)
- with(open("/root/r_base/HRS/1996/data/H96CS_R.DA", "r", encoding="utf-8") )as file:
- HHID_list = []
- PN_list = []
- SEX_list = []
- # 逐行读取文件
- for line in file:
- HHID = line[0:6]
- PN = line[6:9]
- # 1.Male
- # 2.Female
- SEX = line[74:75]
- HHID_list.append(HHID)
- PN_list.append(PN)
- SEX_list.append(SEX)
- data = {
- "HHID":HHID_list,
- "PN":PN_list,
- "SEX":SEX_list,
- }
- result_1996_two = pd.DataFrame(data)
- result_1996 = pd.merge(result_1996_one, result_1996_two, on=["HHID", "PN"], how="left")
- with(open("/root/r_base/HRS/1996/data/H96B_R.DA", "r", encoding="utf-8") )as file:
- HHID_list = []
- PN_list = []
- SMOKED_list = []
- DRINKED_list = []
- PHYSICAL_ACTIVITY_LEVEL_list = []
- BMI_list = []
- HEART_PROBL_list = []
- STROKE_list=[]
- # 逐行读取文件
- for line in file:
- HHID = line[0:6]
- PN = line[6:9]
- # 1. YES
- # 2. Cigars
- # 3. PIPE (IF VOLUNTEERED)
- # 5. NO
- # 8. DK (don't know); NA (not ascertained)
- # 9. RF (refused)
- # 合并后
- # 1. Yes
- # 5. No [Inap in V502-V505]
- SMOKED = "1" if line[160:161] == "1" or line[160:161] == "2" or line[160:161] == "3" else "5" if line[160:161] == "5" else np.nan
- # 1. YES
- # 3. [VOL] NEVER HAVE USED ALCOHOL
- # 5. NO
- # 8. DK (don't know); NA (not ascertained)
- # 9. RF (refused)
- # 合并后
- # 1. Yes
- # 5. No [Inap in V502-V505]
- DRINKED = "1" if line[166:167] == "1" else "5" if line[166:167] == "5" or line[166:167] == "3" else np.nan
- # 3. vigorous (vigorous activity more than once a week)
- # 2. moderate (moderate activity more than once a week)
- # 1. inactive (the rest)
- # 重度活动
- VIGOROUS_PHYSICAL = np.nan
- # 轻度活动
- LIGHT_PHYSICAL = np.nan
- PHYSICAL_ACTIVITY_LEVEL = np.nan
- # 体重
- WEIGH= float(line[174:177])*0.45359237 if not line[174:177].strip() =="" and not float(line[174:177])>400 else np.nan
- # 身高
- HIGHT = line[178:180].strip()
- if not HIGHT == "" and not HIGHT == "98" and not HIGHT == "99" and not pd.isna(WEIGH):
- HIGHT = float(HIGHT)*0.3048 + float(line[180:182])*0.0254
- # BMI
- BMI = WEIGH / math.pow(HEIGHT,2)
- else :
- BMI = np.nan
- # 1. YES
- # 3. [VOL] DISPUTES W1 RECORD
- # 5. NO
- # 7. Other
- # 8. DK (don't know); NA (not ascertained)
- # 9. RF (refused)
- # A heart attack, coronary heart disease, angina, congestive heart failure, or other heart problems
- # 1. Yes
- # 3. [VOL] DISPUTES W1 RECORD
- # 5. No [Inap in V418-V421]
- HEART_PROBL = np.nan if line[66:67] == "9" or line[66:67] == "" else line[66:67]
- # STROKE
- # 1. Yes
- # 3. [VOL] DISPUTES W1 RECORD
- # 5. No [Inap in V418-V421]
- STROKE = np.nan if line[87:88] == "8" else "5" if line[87:88] == "2" or line[87:88] == "5" else line[87:88]
- HHID_list.append(HHID)
- PN_list.append(PN)
- SMOKED_list.append(SMOKED)
- DRINKED_list.append(DRINKED)
- PHYSICAL_ACTIVITY_LEVEL_list.append(PHYSICAL_ACTIVITY_LEVEL)
- BMI_list.append(BMI)
- HEART_PROBL_list.append(HEART_PROBL)
- STROKE_list.append(STROKE)
- data = {
- "HHID":HHID_list,
- "PN":PN_list,
- "SMOKED":SMOKED_list,
- "DRINKED":DRINKED_list,
- "PHYSICAL_ACTIVITY_LEVEL":PHYSICAL_ACTIVITY_LEVEL_list,
- "BMI":BMI_list,
- "HEART_PROBL":HEART_PROBL_list,
- "STROKE":STROKE_list
- }
- result_1996_three = pd.DataFrame(data)
- result_1996 = pd.merge(result_1996, result_1996_three, on=["HHID", "PN"], how="left")
- result = pd.concat([result, result_1996], axis=0)
- # 获取1998数据
- with(open("/root/r_base/HRS/1998/data/H98A_R.DA", "r", encoding="utf-8") )as file:
- HHID_list = []
- PN_list = []
- BIRTH_YEAR_list = []
- MARITAL_STATUS_list = []
- EDUCATION_list = []
- # 逐行读取文件
- for line in file:
- HHID = line[0:6]
- PN = line[6:9]
- BIRTH_YEAR = line[31:35] if not line[31:35] == "9998" else np.nan
- # 0. DK (don't know); NA (not ascertained); RF (refused)
- # 1. MARRIED
- # 3. SEPARATED
- # 4. DIVORCED
- # 5. WIDOWED
- # 6. NEVER MARRIED
- # 7. OTHER (SPECIFY)
- MARITAL_STATUS = line[150:151]
- # 1 Married or Partner; 5 other
- MARITAL_STATUS = "1" if MARITAL_STATUS=="1" else "5" if MARITAL_STATUS=="3" or MARITAL_STATUS=="4" or MARITAL_STATUS=="5" or MARITAL_STATUS=="6" or MARITAL_STATUS=="7" else np.nan
- # 0 For no formal education
- # 1-11 .....Grades
- # 12 .......High school
- # 13-15 ....Some college
- # 16 .......College grad
- # 17 .......Post college (17+ years)
- # 97 .......Other
- # 98. DK (don't know); NA (not ascertained)
- # 99. RF (refused)
- EDUCATION = np.nan if line[61:62] == "97" or line[61:62] == "98" or line[61:62] == "99" else line[61:62]
- HHID_list.append(HHID)
- PN_list.append(PN)
- BIRTH_YEAR_list.append(BIRTH_YEAR)
- MARITAL_STATUS_list.append(MARITAL_STATUS)
- EDUCATION_list.append(EDUCATION)
- data = {
- "HHID":HHID_list,
- "PN":PN_list,
- "BIRTH_YEAR":BIRTH_YEAR_list,
- "MARITAL_STATUS":MARITAL_STATUS_list,
- "EDUCATION":EDUCATION_list,
- }
- data["WAVE"] = 1998
- result_1998_one = pd.DataFrame(data)
- with(open("/root/r_base/HRS/1998/data/H98CS_R.DA", "r", encoding="utf-8") )as file:
- HHID_list = []
- PN_list = []
- SEX_list = []
- # 逐行读取文件
- for line in file:
- HHID = line[0:6]
- PN = line[6:9]
- # 1.Male
- # 2.Female
- SEX = line[70:71]
- HHID_list.append(HHID)
- PN_list.append(PN)
- SEX_list.append(SEX)
- data = {
- "HHID":HHID_list,
- "PN":PN_list,
- "SEX":SEX_list,
- }
- result_1998_two = pd.DataFrame(data)
- result_1998 = pd.merge(result_1998_one, result_1998_two, on=["HHID", "PN"], how="left")
- with(open("/root/r_base/HRS/1998/data/H98B_R.DA", "r", encoding="utf-8") )as file:
- HHID_list = []
- PN_list = []
- SMOKED_list = []
- DRINKED_list = []
- PHYSICAL_ACTIVITY_LEVEL_list = []
- BMI_list = []
- HEART_PROBL_list = []
- STROKE_list=[]
- # 逐行读取文件
- for line in file:
- HHID = line[0:6]
- PN = line[6:9]
- # 1. YES
- # 5. NO
- SMOKED = line[157:158] if line[157:158] == "1" or line[157:158] == "5" else np.nan
- # 1. YES
- # 3. [VOL] NEVER HAVE USED ALCOHOL
- # 5. NO
- # 8. DK (don't know); NA (not ascertained)
- # 9. RF (refused)
- # 合并后
- # 1. Yes
- # 5. No [Inap in V502-V505]
- DRINKED = "1" if line[184:185] == "1" else "5" if line[184:185] == "5" or line[184:185] == "3" else np.nan
- # 3. vigorous (vigorous activity more than once a week)
- # 2. moderate (moderate activity more than once a week)
- # 1. inactive (the rest)
- # 重度活动
- VIGOROUS_PHYSICAL = np.nan
- # 轻度活动
- LIGHT_PHYSICAL = np.nan
- PHYSICAL_ACTIVITY_LEVEL = np.nan
- # 体重
- WEIGH= float(line[196:199])*0.45359237 if not float(line[196:199])>400 else np.nan
- # 身高
- HEIGHT = line[200:202].strip()
- if not HEIGHT == "" and not HEIGHT == "98" and not HEIGHT == "99" and not pd.isna(WEIGH):
- if not line[202:204] == "98":
- HEIGHT = float(HEIGHT)*0.3048 + float(line[202:204])*0.0254
- # BMI
- BMI = WEIGH / math.pow(HEIGHT,2)
- else:
- HEIGHT = float(line[200:202])*0.3048
- # BMI
- BMI = WEIGH / math.pow(HEIGHT,2)
- else :
- BMI = np.nan
- # 1. YES
- # 3. [VOL] DISPUTES PREVIOUS WAVE RECORD
- # 5. NO
- # 6. PRELOAD ERROR: Condition reported at prior wave but said no to
- # new event
- # 8. DK (don't know); NA (not ascertained)
- # 9. RF (refused)
- # A heart attack, coronary heart disease, angina, congestive heart failure, or other heart problems
- # 1. Yes
- # 3. [VOL] DISPUTES W1 RECORD
- # 5. No [Inap in V418-V421]
- HEART_PROBL = np.nan if line[66:67] == "8" else line[66:67]
- # 1. YES
- # 2. [VOL] POSSIBLE STROKE OR TIA (TRANSIENT ISCHEMIC ATTACK)
- # 3. [VOL] DISPUTES PREVIOUS WAVE RECORD
- # 5. NO
- # 8. DK (don't know); NA (not ascertained)
- # 9. RF (refused)
- # STROKE
- # 1. Yes
- # 3. [VOL] DISPUTES W1 RECORD
- # 5. No [Inap in V418-V421]
- STROKE = np.nan if line[87:88] == "8" else "5" if line[87:88] == "2" or line[87:88] == "5" else line[87:88]
- HHID_list.append(HHID)
- PN_list.append(PN)
- SMOKED_list.append(SMOKED)
- DRINKED_list.append(DRINKED)
- PHYSICAL_ACTIVITY_LEVEL_list.append(PHYSICAL_ACTIVITY_LEVEL)
- BMI_list.append(BMI)
- HEART_PROBL_list.append(HEART_PROBL)
- STROKE_list.append(STROKE)
- data = {
- "HHID":HHID_list,
- "PN":PN_list,
- "SMOKED":SMOKED_list,
- "DRINKED":DRINKED_list,
- "PHYSICAL_ACTIVITY_LEVEL":PHYSICAL_ACTIVITY_LEVEL_list,
- "BMI":BMI_list,
- "HEART_PROBL":HEART_PROBL_list,
- "STROKE":STROKE_list
- }
- result_1998_three = pd.DataFrame(data)
- result_1998 = pd.merge(result_1998, result_1998_three, on=["HHID", "PN"], how="left")
- result = pd.concat([result, result_1998], axis=0)
- # 获取2000数据
- with(open("/root/r_base/HRS/2000/data/H00A_R.DA", "r", encoding="utf-8") )as file:
- HHID_list = []
- PN_list = []
- BIRTH_YEAR_list = []
- MARITAL_STATUS_list = []
- EDUCATION_list = []
- # 逐行读取文件
- for line in file:
- HHID = line[0:6]
- PN = line[6:9]
- BIRTH_YEAR = line[31:35]
- # 0. DK (don't know); NA (not ascertained); RF (refused)
- # 1. MARRIED
- # 3. SEPARATED
- # 4. DIVORCED
- # 5. WIDOWED
- # 6. NEVER MARRIED
- # 7. OTHER (SPECIFY)
- MARITAL_STATUS = line[152:153]
- # 1 Married or Partner; 5 other
- MARITAL_STATUS = "1" if MARITAL_STATUS=="1" else "5" if MARITAL_STATUS=="3" or MARITAL_STATUS=="4" or MARITAL_STATUS=="5" or MARITAL_STATUS=="6" or MARITAL_STATUS=="7" else np.nan
- # 0 For no formal education
- # 1-11 .....Grades
- # 12 .......High school
- # 13-15 ....Some college
- # 16 .......College grad
- # 17 .......Post college (17+ years)
- # 97 .......Other
- # 98. DK (don't know); NA (not ascertained)
- # 99. RF (refused)
- EDUCATION = np.nan if line[63:65] == "97" or line[63:65] == "98" or line[63:65] == "99" else line[63:65]
- HHID_list.append(HHID)
- PN_list.append(PN)
- BIRTH_YEAR_list.append(BIRTH_YEAR)
- MARITAL_STATUS_list.append(MARITAL_STATUS)
- EDUCATION_list.append(EDUCATION)
- data = {
- "HHID":HHID_list,
- "PN":PN_list,
- "BIRTH_YEAR":BIRTH_YEAR_list,
- "MARITAL_STATUS":MARITAL_STATUS_list,
- "EDUCATION":EDUCATION_list,
- }
- data["WAVE"] = 2000
- result_2000_one = pd.DataFrame(data)
- with(open("/root/r_base/HRS/2000/data/H00CS_R.DA", "r", encoding="utf-8") )as file:
- HHID_list = []
- PN_list = []
- SEX_list = []
- # 逐行读取文件
- for line in file:
- HHID = line[0:6]
- PN = line[6:9]
- # 1.Male
- # 2.Female
- SEX = line[79:80]
- HHID_list.append(HHID)
- PN_list.append(PN)
- SEX_list.append(SEX)
- data = {
- "HHID":HHID_list,
- "PN":PN_list,
- "SEX":SEX_list,
- }
- result_2000_two = pd.DataFrame(data)
- result_2000 = pd.merge(result_2000_one, result_2000_two, on=["HHID", "PN"], how="left")
- with(open("/root/r_base/HRS/2000/data/H00B_R.DA", "r", encoding="utf-8") )as file:
- HHID_list = []
- PN_list = []
- SMOKED_list = []
- DRINKED_list = []
- PHYSICAL_ACTIVITY_LEVEL_list = []
- BMI_list = []
- HEART_PROBL_list = []
- STROKE_list=[]
- # 逐行读取文件
- for line in file:
- HHID = line[0:6]
- PN = line[6:9]
- # 1. YES
- # 5. NO
- SMOKED = line[154:155] if line[154:155] == "1" or line[154:155] == "5" else np.nan
- # 1. YES
- # 3. [VOL] NEVER HAVE USED ALCOHOL
- # 5. NO
- # 8. DK (don't know); NA (not ascertained)
- # 9. RF (refused)
- # 合并后
- # 1. Yes
- # 5. No [Inap in V502-V505]
- DRINKED = "1" if line[181:182] == "1" else "5" if line[181:182] == "5" or line[181:182] == "3" else np.nan
- # 3. vigorous (vigorous activity more than once a week)
- # 2. moderate (moderate activity more than once a week)
- # 1. inactive (the rest)
- # 重度活动
- VIGOROUS_PHYSICAL = np.nan
- # 轻度活动
- LIGHT_PHYSICAL = np.nan
- PHYSICAL_ACTIVITY_LEVEL = np.nan
- # 体重
- WEIGH= float(line[193:196])*0.45359237 if not float(line[193:196])>400 else np.nan
- # 身高
- HEIGHT = line[197:198].strip()
- if not HEIGHT == "" and not HEIGHT == "9" and not pd.isna(WEIGH):
- HEIGHT = float(HEIGHT)*0.3048 + float(line[198:200])*0.0254
- # BMI
- BMI = WEIGH / math.pow(HEIGHT,2)
- else :
- BMI = np.nan
- # 1. YES
- # 3. DISPUTES PREVIOUS WAVE RECORD, BUT NOW HAS CONDITION
- # 4. DISPUTES PREVIOUS WAVE RECORD, DOES NOT HAVE CONDITION
- # 5. NO
- # 8. DK (don't know); NA (not ascertained)
- # 9. RF (refused)
- # A heart attack, coronary heart disease, angina, congestive heart failure, or other heart problems
- # 1. Yes
- # 5. No [Inap in V418-V421]
- HEART_PROBL = np.nan if line[65:66] == "8" or line[65:66] == "9" else "1" if line[65:66] == "1" or line[65:66] == "3" else "5"
- # 1. YES
- # 2. [VOL] POSSIBLE STROKE OR TIA (TRANSIENT ISCHEMIC ATTACK)
- # 3. DISPUTES PREVIOUS WAVE RECORD, BUT NOW HAS CONDITION
- # 4. DISPUTES PREVIOUS WAVE RECORD, DOES NOT HAVE CONDITION
- # 5. NO
- # 8. DK (don't know); NA (not ascertained)
- # 9. RF (refused)
- # STROKE
- # 1. Yes
- # 5. No [Inap in V418-V421]
- STROKE = np.nan if line[86:87] == "8" or line[86:87] == "9" else "5" if line[86:87] == "2" or line[86:87] == "4" or line[86:87] == "5" else "1"
- HHID_list.append(HHID)
- PN_list.append(PN)
- SMOKED_list.append(SMOKED)
- DRINKED_list.append(DRINKED)
- PHYSICAL_ACTIVITY_LEVEL_list.append(PHYSICAL_ACTIVITY_LEVEL)
- BMI_list.append(BMI)
- HEART_PROBL_list.append(HEART_PROBL)
- STROKE_list.append(STROKE)
- data = {
- "HHID":HHID_list,
- "PN":PN_list,
- "SMOKED":SMOKED_list,
- "DRINKED":DRINKED_list,
- "PHYSICAL_ACTIVITY_LEVEL":PHYSICAL_ACTIVITY_LEVEL_list,
- "BMI":BMI_list,
- "HEART_PROBL":HEART_PROBL_list,
- "STROKE":STROKE_list
- }
- result_2000_three = pd.DataFrame(data)
- result_2000 = pd.merge(result_2000, result_2000_three, on=["HHID", "PN"], how="left")
- result = pd.concat([result, result_2000], axis=0)
- # 获取2002数据
- with(open("/root/r_base/HRS/2002/data/H02PR_R.da", "r", encoding="utf-8") )as file:
- HHID_list = []
- PN_list = []
- BIRTH_YEAR_list = []
- SEX_list = []
- # 逐行读取文件
- for line in file:
- HHID = line[0:6]
- PN = line[6:9]
- BIRTH_YEAR = line[36:40]
- # 1.Male
- # 2.Female
- SEX = line[19:20]
- HHID_list.append(HHID)
- PN_list.append(PN)
- BIRTH_YEAR_list.append(BIRTH_YEAR)
- SEX_list.append(SEX)
- data = {
- "HHID":HHID_list,
- "PN":PN_list,
- "BIRTH_YEAR":BIRTH_YEAR_list,
- "SEX":SEX_list,
- }
- data["WAVE"] = 2002
- result_2002_one = pd.DataFrame(data)
- with(open("/root/r_base/HRS/2002/data/H02C_R.da", "r", encoding="utf-8") )as file:
- HHID_list = []
- PN_list = []
- SMOKED_list = []
- DRINKED_list = []
- BMI_list = []
- HEART_PROBL_list = []
- STROKE_list=[]
- # 逐行读取文件
- for line in file:
- HHID = line[0:6]
- PN = line[6:9]
- # 1. YES
- # 5. NO
- SMOKED = line[155:156] if line[155:156] == "1" or line[155:156] == "5" else np.nan
- # 1. YES
- # 3. [VOL] NEVER HAVE USED ALCOHOL
- # 5. NO
- # 8. DK (don't know); NA (not ascertained)
- # 9. RF (refused)
- # 合并后
- # 1. Yes
- # 5. No [Inap in V502-V505]
- DRINKED = "1" if line[179:180] == "1" else "5" if line[179:180] == "5" or line[179:180] == "3" else np.nan
- # 体重
- WEIGH= float(line[190:193])*0.45359237 if not float(line[190:193])>400 else np.nan
- # 身高
- HEIGHT = line[194:195].strip()
- if not HEIGHT == "" and not HEIGHT == "8" and not pd.isna(WEIGH):
- if not line[195:197] == "98" and not line[195:197]=="":
- HEIGHT = float(HEIGHT)*0.3048 + float(line[195:197])*0.0254
- # BMI
- BMI = WEIGH / math.pow(HEIGHT,2)
- else:
- HEIGHT = float(line[194:195])*0.3048
- # BMI
- BMI = WEIGH / math.pow(HEIGHT,2)
- else :
- BMI = np.nan
- # 1. YES
- # 3. DISPUTES PREVIOUS WAVE RECORD, BUT NOW HAS CONDITION
- # 4. DISPUTES PREVIOUS WAVE RECORD, DOES NOT HAVE CONDITION
- # 5. NO
- # 8. DK (don't know); NA (not ascertained)
- # 9. RF (refused)
- # A heart attack, coronary heart disease, angina, congestive heart failure, or other heart problems
- # 1. Yes
- # 5. No [Inap in V418-V421]
- HEART_PROBL = np.nan if line[59:60] == "8" or line[59:60] == "9" else "1" if line[59:60] == "1" or line[59:60] == "3" else "5"
- # 1. YES
- # 2. [VOL] POSSIBLE STROKE OR TIA (TRANSIENT ISCHEMIC ATTACK)
- # 3. DISPUTES PREVIOUS WAVE RECORD, BUT NOW HAS CONDITION
- # 4. DISPUTES PREVIOUS WAVE RECORD, DOES NOT HAVE CONDITION
- # 5. NO
- # 8. DK (don't know); NA (not ascertained)
- # 9. RF (refused)
- # STROKE
- # 1. Yes
- # 5. No [Inap in V418-V421]
- STROKE = np.nan if line[80:81] == "8" or line[80:81] == "9" else "5" if line[80:81] == "2" or line[80:81] == "4" or line[80:81] == "5" else "1"
- HHID_list.append(HHID)
- PN_list.append(PN)
- SMOKED_list.append(SMOKED)
- DRINKED_list.append(DRINKED)
- BMI_list.append(BMI)
- HEART_PROBL_list.append(HEART_PROBL)
- STROKE_list.append(STROKE)
- data = {
- "HHID":HHID_list,
- "PN":PN_list,
- "SMOKED":SMOKED_list,
- "DRINKED":DRINKED_list,
- "BMI":BMI_list,
- "HEART_PROBL":HEART_PROBL_list,
- "STROKE":STROKE_list
- }
- result_2002_two = pd.DataFrame(data)
- result_2002 = pd.merge(result_2002_one, result_2002_two, on=["HHID", "PN"], how="left")
- with(open("/root/r_base/HRS/2002/data/H02V_R.da", "r", encoding="utf-8") )as file:
- HHID_list = []
- PN_list = []
- PHYSICAL_ACTIVITY_LEVEL_list = []
- # 逐行读取文件
- for line in file:
- HHID = line[0:6]
- PN = line[6:9]
- # 1. MORE THAN ONCE A WEEK
- # 2. ONCE A WEEK
- # 3. ONCE TO THREE TIMES A MONTH
- # 4. HARDLY EVER OR NEVER
- # 8. DK (Don't Know)
- # 9. RF (Refused)
- # 3. vigorous (vigorous activity more than once a week)
- # 2. moderate (moderate activity more than once a week)
- # 1. inactive (the rest)
- # 重度活动
- VIGOROUS_PHYSICAL = "3" if line[151:152] == "1" or line[151:152] == "2" else "1" if line[151:152] == "3" or line[151:152] == "4" else np.nan
- #中度活动
- MODERATE_PHYSICAL = "2" if line[152:153] == "1" or line[152:153] == "2" else "1" if line[152:153] == "3" or line[152:153] == "4" else np.nan
- # 轻度活动
- LIGHT_PHYSICAL = np.nan
- PHYSICAL_ACTIVITY_LEVEL = "3" if VIGOROUS_PHYSICAL=="3" else "2" if MODERATE_PHYSICAL=="2" else "1" if VIGOROUS_PHYSICAL=="1" or MODERATE_PHYSICAL=="1" else np.nan
- HHID_list.append(HHID)
- PN_list.append(PN)
- PHYSICAL_ACTIVITY_LEVEL_list.append(PHYSICAL_ACTIVITY_LEVEL)
- data = {
- "HHID":HHID_list,
- "PN":PN_list,
- "PHYSICAL_ACTIVITY_LEVEL":PHYSICAL_ACTIVITY_LEVEL_list
- }
- result_2002_three = pd.DataFrame(data)
- result_2002 = pd.merge(result_2002, result_2002_three, on=["HHID", "PN"], how="left")
- with(open("/root/r_base/HRS/2002/data/H02B_R.da", "r", encoding="utf-8") )as file:
- HHID_list = []
- PN_list = []
- MARITAL_STATUS_list = []
- EDUCATION_list = []
- # 逐行读取文件
- for line in file:
- HHID = line[0:6]
- PN = line[6:9]
- # 1. Married
- # 3. Separated
- # 4. Divorced
- # 5. Widowed
- # 6. Never Married
- # 7. Other (Specify)
- # 8. DK (Don't Know)
- # 9. RF (Refused)
- MARITAL_STATUS = line[131:132]
- # 1 Married or Partner; 5 other
- MARITAL_STATUS = "1" if MARITAL_STATUS=="1" else "5" if MARITAL_STATUS=="3" or MARITAL_STATUS=="4" or MARITAL_STATUS=="5" or MARITAL_STATUS=="6" or MARITAL_STATUS=="7" else np.nan
- # 0 For no formal education
- # 1-11 .....Grades
- # 12 .......High school
- # 13-15 ....Some college
- # 16 .......College grad
- # 17 .......Post college (17+ years)
- # 97 .......Other
- # 98. DK (don't know); NA (not ascertained)
- # 99. RF (refused)
- EDUCATION = np.nan if line[42:44] == "97" or line[42:44] == "98" or line[42:44] == "99" else line[42:44]
- HHID_list.append(HHID)
- PN_list.append(PN)
- MARITAL_STATUS_list.append(MARITAL_STATUS)
- EDUCATION_list.append(EDUCATION)
- data = {
- "HHID":HHID_list,
- "PN":PN_list,
- "MARITAL_STATUS":MARITAL_STATUS_list,
- "EDUCATION":EDUCATION_list
- }
- result_2002_four = pd.DataFrame(data)
- result_2002 = pd.merge(result_2002, result_2002_four, on=["HHID", "PN"], how="left")
- result = pd.concat([result, result_2002], axis=0)
- # 获取2004数据
- with(open("/root/r_base/HRS/2004/data/H04PR_R.da", "r", encoding="utf-8") )as file:
- HHID_list = []
- PN_list = []
- BIRTH_YEAR_list = []
- SEX_list = []
- # 逐行读取文件
- for line in file:
- HHID = line[0:6]
- PN = line[6:9]
- BIRTH_YEAR = line[25:29]
- # 1.Male
- # 2.Female
- SEX = line[20:21]
- HHID_list.append(HHID)
- PN_list.append(PN)
- BIRTH_YEAR_list.append(BIRTH_YEAR)
- SEX_list.append(SEX)
- data = {
- "HHID":HHID_list,
- "PN":PN_list,
- "BIRTH_YEAR":BIRTH_YEAR_list,
- "SEX":SEX_list,
- }
- data["WAVE"] = 2004
- result_2004_one = pd.DataFrame(data)
- with(open("/root/r_base/HRS/2004/data/H04C_R.da", "r", encoding="utf-8") )as file:
- HHID_list = []
- PN_list = []
- SMOKED_list = []
- DRINKED_list = []
- BMI_list = []
- HEART_PROBL_list = []
- PHYSICAL_ACTIVITY_LEVEL_list = []
- STROKE_list=[]
- # 逐行读取文件
- for line in file:
- HHID = line[0:6]
- PN = line[6:9]
- # 1. YES
- # 5. NO
- SMOKED = line[167:168] if line[167:168] == "1" or line[167:168] == "5" else np.nan
- # 1. YES
- # 3. [VOL] NEVER HAVE USED ALCOHOL
- # 5. NO
- # 8. DK (don't know); NA (not ascertained)
- # 9. RF (refused)
- # 合并后
- # 1. Yes
- # 5. No [Inap in V502-V505]
- DRINKED = "1" if line[192:193] == "1" else "5" if line[192:193] == "5" or line[192:193] == "3" else np.nan
- # 3. vigorous (vigorous activity more than once a week)
- # 2. moderate (moderate activity more than once a week)
- # 1. inactive (the rest)
- # 体重
- WEIGH= float(line[203:206])*0.45359237 if not float(line[203:206])>400 else np.nan
- # 身高
- HEIGHT = line[222:223].strip()
- if not HEIGHT == "" and not HEIGHT == "8" and not pd.isna(WEIGH):
- if not line[223:225] == "98" and not line[223:225]=="":
- HEIGHT = float(HEIGHT)*0.3048 + float(line[223:225])*0.0254
- # BMI
- BMI = WEIGH / math.pow(HEIGHT,2)
- else:
- HEIGHT = float(line[222:223])*0.3048
- # BMI
- BMI = WEIGH / math.pow(HEIGHT,2)
- else :
- BMI = np.nan
- # 1. YES
- # 3. DISPUTES PREVIOUS WAVE RECORD, BUT NOW HAS CONDITION
- # 4. DISPUTES PREVIOUS WAVE RECORD, DOES NOT HAVE CONDITION
- # 5. NO
- # 8. DK (don't know); NA (not ascertained)
- # 9. RF (refused)
- # A heart attack, coronary heart disease, angina, congestive heart failure, or other heart problems
- # 1. Yes
- # 5. No [Inap in V418-V421]
- HEART_PROBL = np.nan if line[69:70] == "8" or line[69:70] == "9" else "1" if line[69:70] == "1" or line[69:70] == "3" else "5"
- # 1. YES
- # 2. [VOL] POSSIBLE STROKE OR TIA (TRANSIENT ISCHEMIC ATTACK)
- # 3. DISPUTES PREVIOUS WAVE RECORD, BUT NOW HAS CONDITION
- # 4. DISPUTES PREVIOUS WAVE RECORD, DOES NOT HAVE CONDITION
- # 5. NO
- # 8. DK (don't know); NA (not ascertained)
- # 9. RF (refused)
- # STROKE
- # 1. Yes
- # 5. No [Inap in V418-V421]
- STROKE = np.nan if line[90:91] == "8" or line[90:91] == "9" else "5" if line[90:91] == "2" or line[90:91] == "4" or line[90:91] == "5" else "1"
- # 1. MORE THAN ONCE A WEEK
- # 2. ONCE A WEEK
- # 3. ONCE TO THREE TIMES A MONTH
- # 4. HARDLY EVER OR NEVER
- # 7. (VOL) EVERY DAY
- # 8. DK (Don't Know)
- # 9. RF (Refused)
- # 3. vigorous (vigorous activity more than once a week)
- # 2. moderate (moderate activity more than once a week)
- # 1. inactive (the rest)
- # 重度活动
- VIGOROUS_PHYSICAL = "3" if line[164:165] == "1" or line[164:165] == "2" or line[164:165] == "7" else "1" if line[164:165] == "3" or line[164:165] == "4" else np.nan
- #中度活动
- MODERATE_PHYSICAL = "2" if line[165:166] == "1" or line[165:166] == "2" or line[165:166] == "7" else "1" if line[165:166] == "3" or line[165:166] == "4" else np.nan
- # 轻度活动
- LIGHT_PHYSICAL = np.nan
- PHYSICAL_ACTIVITY_LEVEL = "3" if VIGOROUS_PHYSICAL=="3" else "2" if MODERATE_PHYSICAL=="2" else "1" if VIGOROUS_PHYSICAL=="1" or MODERATE_PHYSICAL=="1" else np.nan
- HHID_list.append(HHID)
- PN_list.append(PN)
- PHYSICAL_ACTIVITY_LEVEL_list.append(PHYSICAL_ACTIVITY_LEVEL)
- SMOKED_list.append(SMOKED)
- DRINKED_list.append(DRINKED)
- BMI_list.append(BMI)
- HEART_PROBL_list.append(HEART_PROBL)
- STROKE_list.append(STROKE)
- data = {
- "HHID":HHID_list,
- "PN":PN_list,
- "SMOKED":SMOKED_list,
- "DRINKED":DRINKED_list,
- "BMI":BMI_list,
- "HEART_PROBL":HEART_PROBL_list,
- "STROKE":STROKE_list,
- "PHYSICAL_ACTIVITY_LEVEL":PHYSICAL_ACTIVITY_LEVEL_list
- }
- result_2004_two = pd.DataFrame(data)
- result_2004 = pd.merge(result_2004_one, result_2004_two, on=["HHID", "PN"], how="left")
- with(open("/root/r_base/HRS/2004/data/H04B_R.da", "r", encoding="utf-8") )as file:
- HHID_list = []
- PN_list = []
- MARITAL_STATUS_list = []
- EDUCATION_list = []
- # 逐行读取文件
- for line in file:
- HHID = line[0:6]
- PN = line[6:9]
- # 1. MARRIED (VOL)
- # 2. ANULLED (VOL)
- # 3. SEPARATED
- # 4. DIVORCED
- # 5. WIDOWED
- # 6. NEVER MARRIED
- # 7. OTHER (SPECIFY)
- MARITAL_STATUS = line[161:162]
- # 1 Married or Partner; 5 other
- MARITAL_STATUS = "1" if MARITAL_STATUS=="1" else "5" if MARITAL_STATUS=="2" or MARITAL_STATUS=="3" or MARITAL_STATUS=="4" or MARITAL_STATUS=="5" or MARITAL_STATUS=="6" or MARITAL_STATUS=="7" else np.nan
- # 0 For no formal education
- # 1-11 .....Grades
- # 12 .......High school
- # 13-15 ....Some college
- # 16 .......College grad
- # 17 .......Post college (17+ years)
- # 97 .......Other
- # 98. DK (don't know); NA (not ascertained)
- # 99. RF (refused)
- EDUCATION = np.nan if line[41:43] == "97" or line[41:43] == "98" or line[41:43] == "99" else line[41:43]
- HHID_list.append(HHID)
- PN_list.append(PN)
- MARITAL_STATUS_list.append(MARITAL_STATUS)
- EDUCATION_list.append(EDUCATION)
- data = {
- "HHID":HHID_list,
- "PN":PN_list,
- "MARITAL_STATUS":MARITAL_STATUS_list,
- "EDUCATION":EDUCATION_list
- }
- result_2004_four = pd.DataFrame(data)
- result_2004 = pd.merge(result_2004, result_2004_four, on=["HHID", "PN"], how="left")
- result = pd.concat([result, result_2004], axis=0)
- # 获取2006数据
- with(open("/root/r_base/HRS/2006/data/H06PR_R.da", "r", encoding="utf-8") )as file:
- HHID_list = []
- PN_list = []
- BIRTH_YEAR_list = []
- SEX_list = []
- # 逐行读取文件
- for line in file:
- HHID = line[0:6]
- PN = line[6:9]
- BIRTH_YEAR = line[25:29]
- # 1.Male
- # 2.Female
- SEX = line[20:21]
- HHID_list.append(HHID)
- PN_list.append(PN)
- BIRTH_YEAR_list.append(BIRTH_YEAR)
- SEX_list.append(SEX)
- data = {
- "HHID":HHID_list,
- "PN":PN_list,
- "BIRTH_YEAR":BIRTH_YEAR_list,
- "SEX":SEX_list,
- }
- data["WAVE"] = 2006
- result_2006_one = pd.DataFrame(data)
- with(open("/root/r_base/HRS/2006/data/H06C_R.da", "r", encoding="utf-8") )as file:
- HHID_list = []
- PN_list = []
- SMOKED_list = []
- DRINKED_list = []
- BMI_list = []
- HEART_PROBL_list = []
- PHYSICAL_ACTIVITY_LEVEL_list = []
- STROKE_list=[]
- # 逐行读取文件
- for line in file:
- HHID = line[0:6]
- PN = line[6:9]
- # 1. YES
- # 5. NO
- SMOKED = line[181:182]
- SMOKED = SMOKED if SMOKED == "1" or SMOKED == "5" else np.nan
- # 1. YES
- # 3. [VOL] NEVER HAVE USED ALCOHOL
- # 5. NO
- # 8. DK (don't know); NA (not ascertained)
- # 9. RF (refused)
- # 合并后
- # 1. Yes
- # 5. No [Inap in V502-V505]
- DRINKED = line[207:208]
- DRINKED = "1" if DRINKED == "1" else "5" if DRINKED == "5" or DRINKED == "3" else np.nan
- # 3. vigorous (vigorous activity more than once a week)
- # 2. moderate (moderate activity more than once a week)
- # 1. inactive (the rest)
- # 体重
- WEIGH = line[218:221].strip()
- WEIGH= float(WEIGH)*0.45359237 if not WEIGH=="" and not float(WEIGH)>400 else np.nan
- # 身高
- HEIGHT_FEET = line[252:253]
- HEIGHT_INCHES = line[253:255]
- if not HEIGHT_FEET == "" and not HEIGHT_FEET == "8" and not HEIGHT_FEET == "9" and not pd.isna(WEIGH):
- if not HEIGHT_INCHES == "98" and not HEIGHT_INCHES=="":
- HEIGHT = float(HEIGHT_FEET)*0.3048 + float(HEIGHT_INCHES)*0.0254
- # BMI
- BMI = WEIGH / math.pow(HEIGHT,2)
- else:
- HEIGHT = float(HEIGHT_FEET)*0.3048
- # BMI
- BMI = WEIGH / math.pow(HEIGHT,2)
- else :
- BMI = np.nan
- # 1. YES
- # 3. DISPUTES PREVIOUS WAVE RECORD, BUT NOW HAS CONDITION
- # 4. DISPUTES PREVIOUS WAVE RECORD, DOES NOT HAVE CONDITION
- # 5. NO
- # 8. DK (don't know); NA (not ascertained)
- # 9. RF (refused)
- # A heart attack, coronary heart disease, angina, congestive heart failure, or other heart problems
- # 1. Yes
- # 5. No [Inap in V418-V421]
- HEART_PROBL = line[80:81]
- HEART_PROBL = np.nan if HEART_PROBL == "8" or HEART_PROBL == "9" or HEART_PROBL == "" else "1" if HEART_PROBL == "1" or HEART_PROBL == "3" else "5"
- # 1. YES
- # 2. [VOL] POSSIBLE STROKE OR TIA (TRANSIENT ISCHEMIC ATTACK)
- # 3. DISPUTES PREVIOUS WAVE RECORD, BUT NOW HAS CONDITION
- # 4. DISPUTES PREVIOUS WAVE RECORD, DOES NOT HAVE CONDITION
- # 5. NO
- # 8. DK (don't know); NA (not ascertained)
- # 9. RF (refused)
- # STROKE
- # 1. Yes
- # 5. No [Inap in V418-V421]
- STROKE = line[101:102]
- STROKE = np.nan if STROKE == "8" or STROKE == "9" or STROKE == "" else "5" if STROKE == "2" or STROKE == "4" or STROKE == "5" else "1"
- # 1. MORE THAN ONCE A WEEK
- # 2. ONCE A WEEK
- # 3. ONCE TO THREE TIMES A MONTH
- # 4. HARDLY EVER OR NEVER
- # 7. (VOL) EVERY DAY
- # 8. DK (Don't Know)
- # 9. RF (Refused)
- # 3. vigorous (vigorous activity more than once a week)
- # 2. moderate (moderate activity more than once a week)
- # 1. inactive (the rest)
- # 重度活动
- VIGOROUS_PHYSICAL = line[178:179]
- VIGOROUS_PHYSICAL = "3" if VIGOROUS_PHYSICAL == "1" or VIGOROUS_PHYSICAL == "2" or VIGOROUS_PHYSICAL == "7" else "1" if VIGOROUS_PHYSICAL == "3" or VIGOROUS_PHYSICAL == "4" else np.nan
- #中度活动
- MODERATE_PHYSICAL = line[179:180]
- MODERATE_PHYSICAL = "2" if MODERATE_PHYSICAL == "1" or MODERATE_PHYSICAL == "2" or MODERATE_PHYSICAL == "7" else "1" if MODERATE_PHYSICAL == "3" or MODERATE_PHYSICAL == "4" else np.nan
- # 轻度活动
- LIGHT_PHYSICAL = np.nan
- PHYSICAL_ACTIVITY_LEVEL = "3" if VIGOROUS_PHYSICAL=="3" else "2" if MODERATE_PHYSICAL=="2" else "1" if VIGOROUS_PHYSICAL=="1" or MODERATE_PHYSICAL=="1" else np.nan
- HHID_list.append(HHID)
- PN_list.append(PN)
- PHYSICAL_ACTIVITY_LEVEL_list.append(PHYSICAL_ACTIVITY_LEVEL)
- SMOKED_list.append(SMOKED)
- DRINKED_list.append(DRINKED)
- BMI_list.append(BMI)
- HEART_PROBL_list.append(HEART_PROBL)
- STROKE_list.append(STROKE)
- data = {
- "HHID":HHID_list,
- "PN":PN_list,
- "SMOKED":SMOKED_list,
- "DRINKED":DRINKED_list,
- "BMI":BMI_list,
- "HEART_PROBL":HEART_PROBL_list,
- "STROKE":STROKE_list,
- "PHYSICAL_ACTIVITY_LEVEL":PHYSICAL_ACTIVITY_LEVEL_list
- }
- result_2006_two = pd.DataFrame(data)
- result_2006 = pd.merge(result_2006_one, result_2006_two, on=["HHID", "PN"], how="left")
- with(open("/root/r_base/HRS/2006/data/H06B_R.da", "r", encoding="utf-8") )as file:
- HHID_list = []
- PN_list = []
- MARITAL_STATUS_list = []
- EDUCATION_list = []
- # 逐行读取文件
- for line in file:
- HHID = line[0:6]
- PN = line[6:9]
- # 1. MARRIED (VOL)
- # 2. ANULLED (VOL)
- # 3. SEPARATED
- # 4. DIVORCED
- # 5. WIDOWED
- # 6. NEVER MARRIED
- # 7. OTHER (SPECIFY)
- # 8. DK (Don't Know); NA (Not Ascertained)
- # 9. RF (Refused)
- MARITAL_STATUS = line[177:178]
- # 1 Married or Partner; 5 other
- MARITAL_STATUS = "1" if MARITAL_STATUS=="1" else "5" if MARITAL_STATUS=="2" or MARITAL_STATUS=="3" or MARITAL_STATUS=="4" or MARITAL_STATUS=="5" or MARITAL_STATUS=="6" or MARITAL_STATUS=="7" else np.nan
- # 0 For no formal education
- # 1-11 .....Grades
- # 12 .......High school
- # 13-15 ....Some college
- # 16 .......College grad
- # 17 .......Post college (17+ years)
- # 97 .......Other
- # 98. DK (don't know); NA (not ascertained)
- # 99. RF (refused)
- EDUCATION = line[47:49]
- EDUCATION = np.nan if EDUCATION == "97" or EDUCATION == "98" or EDUCATION == "99" else EDUCATION
- HHID_list.append(HHID)
- PN_list.append(PN)
- MARITAL_STATUS_list.append(MARITAL_STATUS)
- EDUCATION_list.append(EDUCATION)
- data = {
- "HHID":HHID_list,
- "PN":PN_list,
- "MARITAL_STATUS":MARITAL_STATUS_list,
- "EDUCATION":EDUCATION_list
- }
- result_2006_four = pd.DataFrame(data)
- result_2006 = pd.merge(result_2006, result_2006_four, on=["HHID", "PN"], how="left")
- result = pd.concat([result, result_2006], axis=0)
- # 获取2008数据
- with(open("/root/r_base/HRS/2008/data/H08PR_R.da", "r", encoding="utf-8") )as file:
- HHID_list = []
- PN_list = []
- BIRTH_YEAR_list = []
- SEX_list = []
- # 逐行读取文件
- for line in file:
- HHID = line[0:6]
- PN = line[6:9]
- BIRTH_YEAR = line[25:29]
- # 1.Male
- # 2.Female
- SEX = line[20:21]
- HHID_list.append(HHID)
- PN_list.append(PN)
- BIRTH_YEAR_list.append(BIRTH_YEAR)
- SEX_list.append(SEX)
- data = {
- "HHID":HHID_list,
- "PN":PN_list,
- "BIRTH_YEAR":BIRTH_YEAR_list,
- "SEX":SEX_list,
- }
- data["WAVE"] = 2008
- result_2008_one = pd.DataFrame(data)
- with(open("/root/r_base/HRS/2008/data/H08C_R.da", "r", encoding="utf-8") )as file:
- HHID_list = []
- PN_list = []
- SMOKED_list = []
- DRINKED_list = []
- BMI_list = []
- HEART_PROBL_list = []
- PHYSICAL_ACTIVITY_LEVEL_list = []
- STROKE_list=[]
- # 逐行读取文件
- for line in file:
- HHID = line[0:6]
- PN = line[6:9]
- # 1. YES
- # 5. NO
- SMOKED = line[258:259]
- SMOKED = SMOKED if SMOKED == "1" or SMOKED == "5" else np.nan
- # 1. YES
- # 3. [VOL] NEVER HAVE USED ALCOHOL
- # 5. NO
- # 8. DK (don't know); NA (not ascertained)
- # 9. RF (refused)
- # 合并后
- # 1. Yes
- # 5. No [Inap in V502-V505]
- DRINKED = line[284:285]
- DRINKED = "1" if DRINKED == "1" else "5" if DRINKED == "5" or DRINKED == "3" else np.nan
- # 3. vigorous (vigorous activity more than once a week)
- # 2. moderate (moderate activity more than once a week)
- # 1. inactive (the rest)
- # 体重
- WEIGH = line[295:298]
- WEIGH= float(WEIGH)*0.45359237 if not float(WEIGH)>400 else np.nan
- # 身高
- HEIGHT_FEET = line[329:330].strip()
- HEIGHT_INCHES = line[330:338].strip()
- if not HEIGHT_FEET == "" and not HEIGHT_FEET == "8" and not HEIGHT_FEET == "9" and not pd.isna(WEIGH):
- if not HEIGHT_INCHES == "99998" and not HEIGHT_INCHES=="":
- HEIGHT = float(HEIGHT_FEET)*0.3048 + float(HEIGHT_INCHES)*0.0254
- # BMI
- BMI = WEIGH / math.pow(HEIGHT,2)
- else:
- HEIGHT = float(HEIGHT_FEET)*0.3048
- # BMI
- BMI = WEIGH / math.pow(HEIGHT,2)
- else :
- BMI = np.nan
- # 1. YES
- # 3. DISPUTES PREVIOUS WAVE RECORD, BUT NOW HAS CONDITION
- # 4. DISPUTES PREVIOUS WAVE RECORD, DOES NOT HAVE CONDITION
- # 5. NO
- # 8. DK (don't know); NA (not ascertained)
- # 9. RF (refused)
- # A heart attack, coronary heart disease, angina, congestive heart failure, or other heart problems
- # 1. Yes
- # 5. No [Inap in V418-V421]
- HEART_PROBL = line[128:129]
- HEART_PROBL = np.nan if HEART_PROBL == "8" or HEART_PROBL == "9" or HEART_PROBL == "" else "1" if HEART_PROBL == "1" or HEART_PROBL == "3" else "5"
- # 1. YES
- # 2. [VOL] POSSIBLE STROKE OR TIA (TRANSIENT ISCHEMIC ATTACK)
- # 3. DISPUTES PREVIOUS WAVE RECORD, BUT NOW HAS CONDITION
- # 4. DISPUTES PREVIOUS WAVE RECORD, DOES NOT HAVE CONDITION
- # 5. NO
- # 8. DK (don't know); NA (not ascertained)
- # 9. RF (refused)
- # STROKE
- # 1. Yes
- # 5. No [Inap in V418-V421]
- STROKE = line[149:150]
- STROKE = np.nan if STROKE == "8" or STROKE == "9" or STROKE == "" else "5" if STROKE == "2" or STROKE == "4" or STROKE == "5" else "1"
- # 1. MORE THAN ONCE A WEEK
- # 2. ONCE A WEEK
- # 3. ONCE TO THREE TIMES A MONTH
- # 4. HARDLY EVER OR NEVER
- # 7. (VOL) EVERY DAY
- # 8. DK (Don't Know)
- # 9. RF (Refused)
- # 3. vigorous (vigorous activity more than once a week)
- # 2. moderate (moderate activity more than once a week)
- # 1. inactive (the rest)
- # 重度活动
- VIGOROUS_PHYSICAL = line[255:256]
- VIGOROUS_PHYSICAL = "3" if VIGOROUS_PHYSICAL == "1" or VIGOROUS_PHYSICAL == "2" or VIGOROUS_PHYSICAL == "7" else "1" if VIGOROUS_PHYSICAL == "3" or VIGOROUS_PHYSICAL == "4" else np.nan
- #中度活动
- MODERATE_PHYSICAL = line[256:257]
- MODERATE_PHYSICAL = "2" if MODERATE_PHYSICAL == "1" or MODERATE_PHYSICAL == "2" or MODERATE_PHYSICAL == "7" else "1" if MODERATE_PHYSICAL == "3" or MODERATE_PHYSICAL == "4" else np.nan
- # 轻度活动
- LIGHT_PHYSICAL = np.nan
- PHYSICAL_ACTIVITY_LEVEL = "3" if VIGOROUS_PHYSICAL=="3" else "2" if MODERATE_PHYSICAL=="2" else "1" if VIGOROUS_PHYSICAL=="1" or MODERATE_PHYSICAL=="1" else np.nan
- HHID_list.append(HHID)
- PN_list.append(PN)
- PHYSICAL_ACTIVITY_LEVEL_list.append(PHYSICAL_ACTIVITY_LEVEL)
- SMOKED_list.append(SMOKED)
- DRINKED_list.append(DRINKED)
- BMI_list.append(BMI)
- HEART_PROBL_list.append(HEART_PROBL)
- STROKE_list.append(STROKE)
- data = {
- "HHID":HHID_list,
- "PN":PN_list,
- "SMOKED":SMOKED_list,
- "DRINKED":DRINKED_list,
- "BMI":BMI_list,
- "HEART_PROBL":HEART_PROBL_list,
- "STROKE":STROKE_list,
- "PHYSICAL_ACTIVITY_LEVEL":PHYSICAL_ACTIVITY_LEVEL_list
- }
- result_2008_two = pd.DataFrame(data)
- result_2008 = pd.merge(result_2008_one, result_2008_two, on=["HHID", "PN"], how="left")
- with(open("/root/r_base/HRS/2008/data/H08B_R.da", "r", encoding="utf-8") )as file:
- HHID_list = []
- PN_list = []
- MARITAL_STATUS_list = []
- EDUCATION_list = []
- # 逐行读取文件
- for line in file:
- HHID = line[0:6]
- PN = line[6:9]
- # 1. MARRIED (VOL)
- # 2. ANULLED (VOL)
- # 3. SEPARATED
- # 4. DIVORCED
- # 5. WIDOWED
- # 6. NEVER MARRIED
- # 7. OTHER (SPECIFY)
- # 8. DK (Don't Know); NA (Not Ascertained)
- # 9. RF (Refused)
- MARITAL_STATUS = line[311:312]
- # 1 Married or Partner; 5 other
- MARITAL_STATUS = "1" if MARITAL_STATUS=="1" else "5" if MARITAL_STATUS=="2" or MARITAL_STATUS=="3" or MARITAL_STATUS=="4" or MARITAL_STATUS=="5" or MARITAL_STATUS=="6" or MARITAL_STATUS=="7" else np.nan
- # 0 For no formal education
- # 1-11 .....Grades
- # 12 .......High school
- # 13-15 ....Some college
- # 16 .......College grad
- # 17 .......Post college (17+ years)
- # 97 .......Other
- # 98. DK (don't know); NA (not ascertained)
- # 99. RF (refused)
- EDUCATION = line[48:50]
- EDUCATION = np.nan if EDUCATION == "97" or EDUCATION == "98" or EDUCATION == "99" or EDUCATION == "" else EDUCATION
- HHID_list.append(HHID)
- PN_list.append(PN)
- MARITAL_STATUS_list.append(MARITAL_STATUS)
- EDUCATION_list.append(EDUCATION)
- data = {
- "HHID":HHID_list,
- "PN":PN_list,
- "MARITAL_STATUS":MARITAL_STATUS_list,
- "EDUCATION":EDUCATION_list
- }
- result_2008_four = pd.DataFrame(data)
- result_2008 = pd.merge(result_2008, result_2008_four, on=["HHID", "PN"], how="left")
- result = pd.concat([result, result_2008], axis=0)
- # 获取2010数据
- with(open("/root/r_base/HRS/2010/data/H10PR_R.da", "r", encoding="utf-8") )as file:
- HHID_list = []
- PN_list = []
- BIRTH_YEAR_list = []
- SEX_list = []
- # 逐行读取文件
- for line in file:
- HHID = line[0:6]
- PN = line[6:9]
- BIRTH_YEAR = line[25:29]
- # 1.Male
- # 2.Female
- SEX = line[20:21]
- HHID_list.append(HHID)
- PN_list.append(PN)
- BIRTH_YEAR_list.append(BIRTH_YEAR)
- SEX_list.append(SEX)
- data = {
- "HHID":HHID_list,
- "PN":PN_list,
- "BIRTH_YEAR":BIRTH_YEAR_list,
- "SEX":SEX_list,
- }
- data["WAVE"] = 2010
- result_2010_one = pd.DataFrame(data)
- with(open("/root/r_base/HRS/2010/data/H10C_R.da", "r", encoding="utf-8") )as file:
- HHID_list = []
- PN_list = []
- SMOKED_list = []
- DRINKED_list = []
- BMI_list = []
- HEART_PROBL_list = []
- PHYSICAL_ACTIVITY_LEVEL_list = []
- STROKE_list=[]
- # 逐行读取文件
- for line in file:
- HHID = line[0:6]
- PN = line[6:9]
- # 1. YES
- # 5. NO
- SMOKED = line[250:251]
- SMOKED = SMOKED if SMOKED == "1" or SMOKED == "5" else np.nan
- # 1. YES
- # 3. [VOL] NEVER HAVE USED ALCOHOL
- # 5. NO
- # 8. DK (don't know); NA (not ascertained)
- # 9. RF (refused)
- # 合并后
- # 1. Yes
- # 5. No [Inap in V502-V505]
- DRINKED = line[276:277]
- DRINKED = "1" if DRINKED == "1" else "5" if DRINKED == "5" or DRINKED == "3" else np.nan
- # 3. vigorous (vigorous activity more than once a week)
- # 2. moderate (moderate activity more than once a week)
- # 1. inactive (the rest)
- # 体重
- WEIGH = line[287:290].strip()
- WEIGH= float(WEIGH)*0.45359237 if not WEIGH=="" and not float(WEIGH)>400 else np.nan
- # 身高
- HEIGHT_FEET = line[304:305].strip()
- HEIGHT_INCHES = line[305:313]
- if not HEIGHT_FEET == "" and not HEIGHT_FEET == "8" and not HEIGHT_FEET == "9" and not pd.isna(WEIGH):
- if not HEIGHT_INCHES == "99998" and not HEIGHT_INCHES=="":
- HEIGHT = float(HEIGHT_FEET)*0.3048 + float(HEIGHT_INCHES)*0.0254
- # BMI
- BMI = WEIGH / math.pow(HEIGHT,2)
- else:
- HEIGHT = float(HEIGHT_FEET)*0.3048
- # BMI
- BMI = WEIGH / math.pow(HEIGHT,2)
- else :
- BMI = np.nan
- # 1. YES
- # 3. DISPUTES PREVIOUS WAVE RECORD, BUT NOW HAS CONDITION
- # 4. DISPUTES PREVIOUS WAVE RECORD, DOES NOT HAVE CONDITION
- # 5. NO
- # 8. DK (don't know); NA (not ascertained)
- # 9. RF (refused)
- # A heart attack, coronary heart disease, angina, congestive heart failure, or other heart problems
- # 1. Yes
- # 5. No [Inap in V418-V421]
- HEART_PROBL = line[76:77]
- HEART_PROBL = np.nan if HEART_PROBL == "8" or HEART_PROBL == "9" or HEART_PROBL == "" else "1" if HEART_PROBL == "1" or HEART_PROBL == "3" else "5"
- # 1. YES
- # 2. [VOL] POSSIBLE STROKE OR TIA (TRANSIENT ISCHEMIC ATTACK)
- # 3. DISPUTES PREVIOUS WAVE RECORD, BUT NOW HAS CONDITION
- # 4. DISPUTES PREVIOUS WAVE RECORD, DOES NOT HAVE CONDITION
- # 5. NO
- # 8. DK (don't know); NA (not ascertained)
- # 9. RF (refused)
- # STROKE
- # 1. Yes
- # 5. No [Inap in V418-V421]
- STROKE = line[139:140]
- STROKE = np.nan if STROKE == "8" or STROKE == "9" or STROKE == "" else "5" if STROKE == "2" or STROKE == "4" or STROKE == "5" else "1"
- # 1. MORE THAN ONCE A WEEK
- # 2. ONCE A WEEK
- # 3. ONCE TO THREE TIMES A MONTH
- # 4. HARDLY EVER OR NEVER
- # 7. (VOL) EVERY DAY
- # 8. DK (Don't Know)
- # 9. RF (Refused)
- # 3. vigorous (vigorous activity more than once a week)
- # 2. moderate (moderate activity more than once a week)
- # 1. inactive (the rest)
- # 重度活动
- VIGOROUS_PHYSICAL = line[247:248]
- VIGOROUS_PHYSICAL = "3" if VIGOROUS_PHYSICAL == "1" or VIGOROUS_PHYSICAL == "2" or VIGOROUS_PHYSICAL == "7" else "1" if VIGOROUS_PHYSICAL == "3" or VIGOROUS_PHYSICAL == "4" else np.nan
- #中度活动
- MODERATE_PHYSICAL = line[248:249]
- MODERATE_PHYSICAL = "2" if MODERATE_PHYSICAL == "1" or MODERATE_PHYSICAL == "2" or MODERATE_PHYSICAL == "7" else "1" if MODERATE_PHYSICAL == "3" or MODERATE_PHYSICAL == "4" else np.nan
- # 轻度活动
- LIGHT_PHYSICAL = np.nan
- PHYSICAL_ACTIVITY_LEVEL = "3" if VIGOROUS_PHYSICAL=="3" else "2" if MODERATE_PHYSICAL=="2" else "1" if VIGOROUS_PHYSICAL=="1" or MODERATE_PHYSICAL=="1" else np.nan
- HHID_list.append(HHID)
- PN_list.append(PN)
- PHYSICAL_ACTIVITY_LEVEL_list.append(PHYSICAL_ACTIVITY_LEVEL)
- SMOKED_list.append(SMOKED)
- DRINKED_list.append(DRINKED)
- BMI_list.append(BMI)
- HEART_PROBL_list.append(HEART_PROBL)
- STROKE_list.append(STROKE)
- data = {
- "HHID":HHID_list,
- "PN":PN_list,
- "SMOKED":SMOKED_list,
- "DRINKED":DRINKED_list,
- "BMI":BMI_list,
- "HEART_PROBL":HEART_PROBL_list,
- "STROKE":STROKE_list,
- "PHYSICAL_ACTIVITY_LEVEL":PHYSICAL_ACTIVITY_LEVEL_list
- }
- result_2010_two = pd.DataFrame(data)
- result_2010 = pd.merge(result_2010_one, result_2010_two, on=["HHID", "PN"], how="left")
- with(open("/root/r_base/HRS/2010/data/H10B_R.da", "r", encoding="utf-8") )as file:
- HHID_list = []
- PN_list = []
- MARITAL_STATUS_list = []
- EDUCATION_list = []
- # 逐行读取文件
- for line in file:
- HHID = line[0:6]
- PN = line[6:9]
- # 1. MARRIED (VOL)
- # 2. ANULLED (VOL)
- # 3. SEPARATED
- # 4. DIVORCED
- # 5. WIDOWED
- # 6. NEVER MARRIED
- # 7. OTHER (SPECIFY)
- # 8. DK (Don't Know); NA (Not Ascertained)
- # 9. RF (Refused)
- MARITAL_STATUS = line[305:306]
- # 1 Married or Partner; 5 other
- MARITAL_STATUS = "1" if MARITAL_STATUS=="1" else "5" if MARITAL_STATUS=="2" or MARITAL_STATUS=="3" or MARITAL_STATUS=="4" or MARITAL_STATUS=="5" or MARITAL_STATUS=="6" or MARITAL_STATUS=="7" else np.nan
- # 0 For no formal education
- # 1-11 .....Grades
- # 12 .......High school
- # 13-15 ....Some college
- # 16 .......College grad
- # 17 .......Post college (17+ years)
- # 97 .......Other
- # 98. DK (don't know); NA (not ascertained)
- # 99. RF (refused)
- EDUCATION = line[48:50]
- EDUCATION = np.nan if EDUCATION == "97" or EDUCATION == "98" or EDUCATION == "99" or EDUCATION == "" else EDUCATION
- HHID_list.append(HHID)
- PN_list.append(PN)
- MARITAL_STATUS_list.append(MARITAL_STATUS)
- EDUCATION_list.append(EDUCATION)
- data = {
- "HHID":HHID_list,
- "PN":PN_list,
- "MARITAL_STATUS":MARITAL_STATUS_list,
- "EDUCATION":EDUCATION_list
- }
- result_2010_four = pd.DataFrame(data)
- result_2010 = pd.merge(result_2010, result_2010_four, on=["HHID", "PN"], how="left")
- result = pd.concat([result, result_2010], axis=0)
- # 获取2012数据
- with(open("/root/r_base/HRS/2012/data/H12PR_R.da", "r", encoding="utf-8") )as file:
- HHID_list = []
- PN_list = []
- BIRTH_YEAR_list = []
- SEX_list = []
- # 逐行读取文件
- for line in file:
- HHID = line[0:6]
- PN = line[6:9]
- BIRTH_YEAR = line[25:29] if not line[25:29] == "" else np.nan
- # 1.Male
- # 2.Female
- SEX = line[20:21] if not line[20:21] == "" else np.nan
- HHID_list.append(HHID)
- PN_list.append(PN)
- BIRTH_YEAR_list.append(BIRTH_YEAR)
- SEX_list.append(SEX)
- data = {
- "HHID":HHID_list,
- "PN":PN_list,
- "BIRTH_YEAR":BIRTH_YEAR_list,
- "SEX":SEX_list,
- }
- data["WAVE"] = 2012
- result_2012_one = pd.DataFrame(data)
- with(open("/root/r_base/HRS/2012/data/H12C_R.da", "r", encoding="utf-8") )as file:
- HHID_list = []
- PN_list = []
- SMOKED_list = []
- DRINKED_list = []
- BMI_list = []
- HEART_PROBL_list = []
- PHYSICAL_ACTIVITY_LEVEL_list = []
- STROKE_list=[]
- # 逐行读取文件
- for line in file:
- HHID = line[0:6]
- PN = line[6:9]
- # 1. YES
- # 5. NO
- SMOKED = line[249:250]
- SMOKED = SMOKED if SMOKED == "1" or SMOKED == "5" else np.nan
- # 1. YES
- # 3. [VOL] NEVER HAVE USED ALCOHOL
- # 5. NO
- # 8. DK (don't know); NA (not ascertained)
- # 9. RF (refused)
- # 合并后
- # 1. Yes
- # 5. No [Inap in V502-V505]
- DRINKED = line[276:277]
- DRINKED = "1" if DRINKED == "1" else "5" if DRINKED == "5" or DRINKED == "3" else np.nan
- # 3. vigorous (vigorous activity more than once a week)
- # 2. moderate (moderate activity more than once a week)
- # 1. inactive (the rest)
- # 体重
- WEIGH = line[287:290].strip()
- WEIGH= float(WEIGH)*0.45359237 if not WEIGH=="" and not float(WEIGH)>400 else np.nan
- # 身高
- HEIGHT_FEET = line[302:303].strip()
- HEIGHT_INCHES = line[303:308]
- if not HEIGHT_FEET == "" and not HEIGHT_FEET == "8" and not HEIGHT_FEET == "9" and not pd.isna(WEIGH):
- if not HEIGHT_INCHES == "98" and not HEIGHT_INCHES=="":
- HEIGHT = float(HEIGHT_FEET)*0.3048 + float(HEIGHT_INCHES)*0.0254
- # BMI
- BMI = WEIGH / math.pow(HEIGHT,2)
- else:
- HEIGHT = float(HEIGHT_FEET)*0.3048
- # BMI
- BMI = WEIGH / math.pow(HEIGHT,2)
- else :
- BMI = np.nan
- # 1. YES
- # 3. DISPUTES PREVIOUS WAVE RECORD, BUT NOW HAS CONDITION
- # 4. DISPUTES PREVIOUS WAVE RECORD, DOES NOT HAVE CONDITION
- # 5. NO
- # 8. DK (don't know); NA (not ascertained)
- # 9. RF (refused)
- # A heart attack, coronary heart disease, angina, congestive heart failure, or other heart problems
- # 1. Yes
- # 5. No [Inap in V418-V421]
- HEART_PROBL = line[82:83]
- HEART_PROBL = np.nan if HEART_PROBL == "8" or HEART_PROBL == "9" or HEART_PROBL == "" else "1" if HEART_PROBL == "1" or HEART_PROBL == "3" else "5"
- # 1. YES
- # 2. [VOL] POSSIBLE STROKE OR TIA (TRANSIENT ISCHEMIC ATTACK)
- # 3. DISPUTES PREVIOUS WAVE RECORD, BUT NOW HAS CONDITION
- # 4. DISPUTES PREVIOUS WAVE RECORD, DOES NOT HAVE CONDITION
- # 5. NO
- # 8. DK (don't know); NA (not ascertained)
- # 9. RF (refused)
- # STROKE
- # 1. Yes
- # 5. No [Inap in V418-V421]
- STROKE = line[146:147]
- STROKE = np.nan if STROKE == "8" or STROKE == "9" or STROKE == "" else "5" if STROKE == "2" or STROKE == "4" or STROKE == "5" else "1"
- # 1. MORE THAN ONCE A WEEK
- # 2. ONCE A WEEK
- # 3. ONCE TO THREE TIMES A MONTH
- # 4. HARDLY EVER OR NEVER
- # 7. (VOL) EVERY DAY
- # 8. DK (Don't Know)
- # 9. RF (Refused)
- # 3. vigorous (vigorous activity more than once a week)
- # 2. moderate (moderate activity more than once a week)
- # 1. inactive (the rest)
- # 重度活动
- VIGOROUS_PHYSICAL = line[246:247]
- VIGOROUS_PHYSICAL = "3" if VIGOROUS_PHYSICAL == "1" or VIGOROUS_PHYSICAL == "2" or VIGOROUS_PHYSICAL == "7" else "1" if VIGOROUS_PHYSICAL == "3" or VIGOROUS_PHYSICAL == "4" else np.nan
- #中度活动
- MODERATE_PHYSICAL = line[247:248]
- MODERATE_PHYSICAL = "2" if MODERATE_PHYSICAL == "1" or MODERATE_PHYSICAL == "2" or MODERATE_PHYSICAL == "7" else "1" if MODERATE_PHYSICAL == "3" or MODERATE_PHYSICAL == "4" else np.nan
- # 轻度活动
- LIGHT_PHYSICAL = np.nan
- PHYSICAL_ACTIVITY_LEVEL = "3" if VIGOROUS_PHYSICAL=="3" else "2" if MODERATE_PHYSICAL=="2" else "1" if VIGOROUS_PHYSICAL=="1" or MODERATE_PHYSICAL=="1" else np.nan
- HHID_list.append(HHID)
- PN_list.append(PN)
- PHYSICAL_ACTIVITY_LEVEL_list.append(PHYSICAL_ACTIVITY_LEVEL)
- SMOKED_list.append(SMOKED)
- DRINKED_list.append(DRINKED)
- BMI_list.append(BMI)
- HEART_PROBL_list.append(HEART_PROBL)
- STROKE_list.append(STROKE)
- data = {
- "HHID":HHID_list,
- "PN":PN_list,
- "SMOKED":SMOKED_list,
- "DRINKED":DRINKED_list,
- "BMI":BMI_list,
- "HEART_PROBL":HEART_PROBL_list,
- "STROKE":STROKE_list,
- "PHYSICAL_ACTIVITY_LEVEL":PHYSICAL_ACTIVITY_LEVEL_list
- }
- result_2012_two = pd.DataFrame(data)
- result_2012 = pd.merge(result_2012_one, result_2012_two, on=["HHID", "PN"], how="left")
- with(open("/root/r_base/HRS/2012/data/H12B_R.da", "r", encoding="utf-8") )as file:
- HHID_list = []
- PN_list = []
- MARITAL_STATUS_list = []
- EDUCATION_list = []
- # 逐行读取文件
- for line in file:
- HHID = line[0:6]
- PN = line[6:9]
- # 1. MARRIED (VOL)
- # 2. ANULLED (VOL)
- # 3. SEPARATED
- # 4. DIVORCED
- # 5. WIDOWED
- # 6. NEVER MARRIED
- # 7. OTHER (SPECIFY)
- # 8. DK (Don't Know); NA (Not Ascertained)
- # 9. RF (Refused)
- MARITAL_STATUS = line[294:295]
- # 1 Married or Partner; 5 other
- MARITAL_STATUS = "1" if MARITAL_STATUS=="1" else "5" if MARITAL_STATUS=="2" or MARITAL_STATUS=="3" or MARITAL_STATUS=="4" or MARITAL_STATUS=="5" or MARITAL_STATUS=="6" or MARITAL_STATUS=="7" else np.nan
- # 0 For no formal education
- # 1-11 .....Grades
- # 12 .......High school
- # 13-15 ....Some college
- # 16 .......College grad
- # 17 .......Post college (17+ years)
- # 97 .......Other
- # 98. DK (don't know); NA (not ascertained)
- # 99. RF (refused)
- EDUCATION = line[48:50]
- EDUCATION = np.nan if EDUCATION == "97" or EDUCATION == "98" or EDUCATION == "99" or EDUCATION == "" else EDUCATION
- HHID_list.append(HHID)
- PN_list.append(PN)
- MARITAL_STATUS_list.append(MARITAL_STATUS)
- EDUCATION_list.append(EDUCATION)
- data = {
- "HHID":HHID_list,
- "PN":PN_list,
- "MARITAL_STATUS":MARITAL_STATUS_list,
- "EDUCATION":EDUCATION_list
- }
- result_2012_four = pd.DataFrame(data)
- result_2012 = pd.merge(result_2012, result_2012_four, on=["HHID", "PN"], how="left")
- result = pd.concat([result, result_2012], axis=0)
- # 获取2014数据
- with(open("/root/r_base/HRS/2014/data/H14PR_R.da", "r", encoding="utf-8") )as file:
- HHID_list = []
- PN_list = []
- BIRTH_YEAR_list = []
- SEX_list = []
- # 逐行读取文件
- for line in file:
- HHID = line[0:6]
- PN = line[6:9]
- BIRTH_YEAR = line[25:29] if not line[25:29] == "" else np.nan
- # 1.Male
- # 2.Female
- SEX = line[20:21] if not line[20:21] == "" else np.nan
- HHID_list.append(HHID)
- PN_list.append(PN)
- BIRTH_YEAR_list.append(BIRTH_YEAR)
- SEX_list.append(SEX)
- data = {
- "HHID":HHID_list,
- "PN":PN_list,
- "BIRTH_YEAR":BIRTH_YEAR_list,
- "SEX":SEX_list,
- }
- data["WAVE"] = 2014
- result_2014_one = pd.DataFrame(data)
- with(open("/root/r_base/HRS/2014/data/H14C_R.da", "r", encoding="utf-8") )as file:
- HHID_list = []
- PN_list = []
- SMOKED_list = []
- DRINKED_list = []
- BMI_list = []
- HEART_PROBL_list = []
- PHYSICAL_ACTIVITY_LEVEL_list = []
- STROKE_list=[]
- # 逐行读取文件
- for line in file:
- HHID = line[0:6]
- PN = line[6:9]
- # 1. YES
- # 5. NO
- SMOKED = line[214:215]
- SMOKED = SMOKED if SMOKED == "1" or SMOKED == "5" else np.nan
- # 1. YES
- # 3. [VOL] NEVER HAVE USED ALCOHOL
- # 5. NO
- # 8. DK (don't know); NA (not ascertained)
- # 9. RF (refused)
- # 合并后
- # 1. Yes
- # 5. No [Inap in V502-V505]
- DRINKED = line[239:240]
- DRINKED = "1" if DRINKED == "1" else "5" if DRINKED == "5" or DRINKED == "3" else np.nan
- # 3. vigorous (vigorous activity more than once a week)
- # 2. moderate (moderate activity more than once a week)
- # 1. inactive (the rest)
- # 体重
- WEIGH = line[250:253].strip()
- WEIGH= float(WEIGH)*0.45359237 if not WEIGH=="" and not float(WEIGH)>400 else np.nan
- # 身高
- HEIGHT_FEET = line[259:260].strip()
- HEIGHT_INCHES = line[260:265]
- if not HEIGHT_FEET == "" and not HEIGHT_FEET == "8" and not HEIGHT_FEET == "9" and not pd.isna(WEIGH):
- if not HEIGHT_INCHES == "98" and not HEIGHT_INCHES=="":
- HEIGHT = float(HEIGHT_FEET)*0.3048 + float(HEIGHT_INCHES)*0.0254
- # BMI
- BMI = WEIGH / math.pow(HEIGHT,2)
- else:
- HEIGHT = float(HEIGHT_FEET)*0.3048
- # BMI
- BMI = WEIGH / math.pow(HEIGHT,2)
- else :
- BMI = np.nan
- # 1. YES
- # 3. DISPUTES PREVIOUS WAVE RECORD, BUT NOW HAS CONDITION
- # 4. DISPUTES PREVIOUS WAVE RECORD, DOES NOT HAVE CONDITION
- # 5. NO
- # 8. DK (don't know); NA (not ascertained)
- # 9. RF (refused)
- # A heart attack, coronary heart disease, angina, congestive heart failure, or other heart problems
- # 1. Yes
- # 5. No [Inap in V418-V421]
- HEART_PROBL = line[66:67]
- HEART_PROBL = np.nan if HEART_PROBL == "8" or HEART_PROBL == "9" or HEART_PROBL == "" else "1" if HEART_PROBL == "1" or HEART_PROBL == "3" else "5"
- # 1. YES
- # 2. [VOL] POSSIBLE STROKE OR TIA (TRANSIENT ISCHEMIC ATTACK)
- # 3. DISPUTES PREVIOUS WAVE RECORD, BUT NOW HAS CONDITION
- # 4. DISPUTES PREVIOUS WAVE RECORD, DOES NOT HAVE CONDITION
- # 5. NO
- # 8. DK (don't know); NA (not ascertained)
- # 9. RF (refused)
- # STROKE
- # 1. Yes
- # 5. No [Inap in V418-V421]
- STROKE = line[122:123]
- STROKE = np.nan if STROKE == "8" or STROKE == "9" or STROKE == "" else "5" if STROKE == "2" or STROKE == "4" or STROKE == "5" else "1"
- # 1. MORE THAN ONCE A WEEK
- # 2. ONCE A WEEK
- # 3. ONCE TO THREE TIMES A MONTH
- # 4. HARDLY EVER OR NEVER
- # 7. (VOL) EVERY DAY
- # 8. DK (Don't Know)
- # 9. RF (Refused)
- # 3. vigorous (vigorous activity more than once a week)
- # 2. moderate (moderate activity more than once a week)
- # 1. inactive (the rest)
- # 重度活动
- VIGOROUS_PHYSICAL = line[211:212]
- VIGOROUS_PHYSICAL = "3" if VIGOROUS_PHYSICAL == "1" or VIGOROUS_PHYSICAL == "2" or VIGOROUS_PHYSICAL == "7" else "1" if VIGOROUS_PHYSICAL == "3" or VIGOROUS_PHYSICAL == "4" else np.nan
- #中度活动
- MODERATE_PHYSICAL = line[212:213]
- MODERATE_PHYSICAL = "2" if MODERATE_PHYSICAL == "1" or MODERATE_PHYSICAL == "2" or MODERATE_PHYSICAL == "7" else "1" if MODERATE_PHYSICAL == "3" or MODERATE_PHYSICAL == "4" else np.nan
- # 轻度活动
- LIGHT_PHYSICAL = np.nan
- PHYSICAL_ACTIVITY_LEVEL = "3" if VIGOROUS_PHYSICAL=="3" else "2" if MODERATE_PHYSICAL=="2" else "1" if VIGOROUS_PHYSICAL=="1" or MODERATE_PHYSICAL=="1" else np.nan
- HHID_list.append(HHID)
- PN_list.append(PN)
- PHYSICAL_ACTIVITY_LEVEL_list.append(PHYSICAL_ACTIVITY_LEVEL)
- SMOKED_list.append(SMOKED)
- DRINKED_list.append(DRINKED)
- BMI_list.append(BMI)
- HEART_PROBL_list.append(HEART_PROBL)
- STROKE_list.append(STROKE)
- data = {
- "HHID":HHID_list,
- "PN":PN_list,
- "SMOKED":SMOKED_list,
- "DRINKED":DRINKED_list,
- "BMI":BMI_list,
- "HEART_PROBL":HEART_PROBL_list,
- "STROKE":STROKE_list,
- "PHYSICAL_ACTIVITY_LEVEL":PHYSICAL_ACTIVITY_LEVEL_list
- }
- result_2014_two = pd.DataFrame(data)
- result_2014 = pd.merge(result_2014_one, result_2014_two, on=["HHID", "PN"], how="left")
- with(open("/root/r_base/HRS/2014/data/H14B_R.da", "r", encoding="utf-8") )as file:
- HHID_list = []
- PN_list = []
- MARITAL_STATUS_list = []
- EDUCATION_list = []
- # 逐行读取文件
- for line in file:
- HHID = line[0:6]
- PN = line[6:9]
- # 1. MARRIED (VOL)
- # 2. ANULLED (VOL)
- # 3. SEPARATED
- # 4. DIVORCED
- # 5. WIDOWED
- # 6. NEVER MARRIED
- # 7. OTHER (SPECIFY)
- # 8. DK (Don't Know); NA (Not Ascertained)
- # 9. RF (Refused)
- MARITAL_STATUS = line[274:275]
- # 1 Married or Partner; 5 other
- MARITAL_STATUS = "1" if MARITAL_STATUS=="1" else "5" if MARITAL_STATUS=="2" or MARITAL_STATUS=="3" or MARITAL_STATUS=="4" or MARITAL_STATUS=="5" or MARITAL_STATUS=="6" or MARITAL_STATUS=="7" else np.nan
- # 0 For no formal education
- # 1-11 .....Grades
- # 12 .......High school
- # 13-15 ....Some college
- # 16 .......College grad
- # 17 .......Post college (17+ years)
- # 97 .......Other
- # 98. DK (don't know); NA (not ascertained)
- # 99. RF (refused)
- EDUCATION = line[39:41]
- EDUCATION = np.nan if EDUCATION == "97" or EDUCATION == "98" or EDUCATION == "99" or EDUCATION == "" else EDUCATION
- HHID_list.append(HHID)
- PN_list.append(PN)
- MARITAL_STATUS_list.append(MARITAL_STATUS)
- EDUCATION_list.append(EDUCATION)
- data = {
- "HHID":HHID_list,
- "PN":PN_list,
- "MARITAL_STATUS":MARITAL_STATUS_list,
- "EDUCATION":EDUCATION_list
- }
- result_2014_four = pd.DataFrame(data)
- result_2014 = pd.merge(result_2014, result_2014_four, on=["HHID", "PN"], how="left")
- result = pd.concat([result, result_2014], axis=0)
- # 获取2016数据
- with(open("/root/r_base/HRS/2016/data/H16PR_R.da", "r", encoding="utf-8") )as file:
- HHID_list = []
- PN_list = []
- BIRTH_YEAR_list = []
- SEX_list = []
- # 逐行读取文件
- for line in file:
- HHID = line[0:6]
- PN = line[6:9]
- BIRTH_YEAR = line[25:29] if not line[25:29] == "" else np.nan
- # 1.Male
- # 2.Female
- SEX = line[20:21] if not line[20:21] == "" else np.nan
- HHID_list.append(HHID)
- PN_list.append(PN)
- BIRTH_YEAR_list.append(BIRTH_YEAR)
- SEX_list.append(SEX)
- data = {
- "HHID":HHID_list,
- "PN":PN_list,
- "BIRTH_YEAR":BIRTH_YEAR_list,
- "SEX":SEX_list,
- }
- data["WAVE"] = 2016
- result_2016_one = pd.DataFrame(data)
- with(open("/root/r_base/HRS/2016/data/H16C_R.da", "r", encoding="utf-8") )as file:
- HHID_list = []
- PN_list = []
- SMOKED_list = []
- DRINKED_list = []
- BMI_list = []
- HEART_PROBL_list = []
- PHYSICAL_ACTIVITY_LEVEL_list = []
- STROKE_list=[]
- # 逐行读取文件
- for line in file:
- HHID = line[0:6]
- PN = line[6:9]
- # 1. YES
- # 5. NO
- SMOKED = line[237:238]
- SMOKED = SMOKED if SMOKED == "1" or SMOKED == "5" else np.nan
- # 1. YES
- # 3. [VOL] NEVER HAVE USED ALCOHOL
- # 5. NO
- # 8. DK (don't know); NA (not ascertained)
- # 9. RF (refused)
- # 合并后
- # 1. Yes
- # 5. No [Inap in V502-V505]
- DRINKED = line[262:263]
- DRINKED = "1" if DRINKED == "1" else "5" if DRINKED == "5" or DRINKED == "3" else np.nan
- # 3. vigorous (vigorous activity more than once a week)
- # 2. moderate (moderate activity more than once a week)
- # 1. inactive (the rest)
- # 体重
- WEIGH = line[273:276].strip()
- WEIGH= float(WEIGH)*0.45359237 if not WEIGH=="" and not float(WEIGH)>400 else np.nan
- # 身高
- HEIGHT_FEET = line[282:283].strip()
- HEIGHT_INCHES = line[283:288]
- if not HEIGHT_FEET == "" and not HEIGHT_FEET == "8" and not HEIGHT_FEET == "9" and not pd.isna(WEIGH):
- if not HEIGHT_INCHES == "98" and not HEIGHT_INCHES=="":
- HEIGHT = float(HEIGHT_FEET)*0.3048 + float(HEIGHT_INCHES)*0.0254
- # BMI
- BMI = WEIGH / math.pow(HEIGHT,2)
- else:
- HEIGHT = float(HEIGHT_FEET)*0.3048
- # BMI
- BMI = WEIGH / math.pow(HEIGHT,2)
- else :
- BMI = np.nan
- # 1. YES
- # 3. DISPUTES PREVIOUS WAVE RECORD, BUT NOW HAS CONDITION
- # 4. DISPUTES PREVIOUS WAVE RECORD, DOES NOT HAVE CONDITION
- # 5. NO
- # 8. DK (don't know); NA (not ascertained)
- # 9. RF (refused)
- # A heart attack, coronary heart disease, angina, congestive heart failure, or other heart problems
- # 1. Yes
- # 5. No [Inap in V418-V421]
- HEART_PROBL = line[67:68]
- HEART_PROBL = np.nan if HEART_PROBL == "8" or HEART_PROBL == "9" or HEART_PROBL == "" else "1" if HEART_PROBL == "1" or HEART_PROBL == "3" else "5"
- # 1. YES
- # 2. [VOL] POSSIBLE STROKE OR TIA (TRANSIENT ISCHEMIC ATTACK)
- # 3. DISPUTES PREVIOUS WAVE RECORD, BUT NOW HAS CONDITION
- # 4. DISPUTES PREVIOUS WAVE RECORD, DOES NOT HAVE CONDITION
- # 5. NO
- # 8. DK (don't know); NA (not ascertained)
- # 9. RF (refused)
- # STROKE
- # 1. Yes
- # 5. No [Inap in V418-V421]
- STROKE = line[123:124]
- STROKE = np.nan if STROKE == "8" or STROKE == "9" or STROKE == "" else "5" if STROKE == "2" or STROKE == "4" or STROKE == "5" else "1"
- # 1. MORE THAN ONCE A WEEK
- # 2. ONCE A WEEK
- # 3. ONCE TO THREE TIMES A MONTH
- # 4. HARDLY EVER OR NEVER
- # 7. (VOL) EVERY DAY
- # 8. DK (Don't Know)
- # 9. RF (Refused)
- # 3. vigorous (vigorous activity more than once a week)
- # 2. moderate (moderate activity more than once a week)
- # 1. inactive (the rest)
- # 重度活动
- VIGOROUS_PHYSICAL = line[234:235]
- VIGOROUS_PHYSICAL = "3" if VIGOROUS_PHYSICAL == "1" or VIGOROUS_PHYSICAL == "2" or VIGOROUS_PHYSICAL == "7" else "1" if VIGOROUS_PHYSICAL == "3" or VIGOROUS_PHYSICAL == "4" else np.nan
- #中度活动
- MODERATE_PHYSICAL = line[235:236]
- MODERATE_PHYSICAL = "2" if MODERATE_PHYSICAL == "1" or MODERATE_PHYSICAL == "2" or MODERATE_PHYSICAL == "7" else "1" if MODERATE_PHYSICAL == "3" or MODERATE_PHYSICAL == "4" else np.nan
- # 轻度活动
- LIGHT_PHYSICAL = np.nan
- PHYSICAL_ACTIVITY_LEVEL = "3" if VIGOROUS_PHYSICAL=="3" else "2" if MODERATE_PHYSICAL=="2" else "1" if VIGOROUS_PHYSICAL=="1" or MODERATE_PHYSICAL=="1" else np.nan
- HHID_list.append(HHID)
- PN_list.append(PN)
- PHYSICAL_ACTIVITY_LEVEL_list.append(PHYSICAL_ACTIVITY_LEVEL)
- SMOKED_list.append(SMOKED)
- DRINKED_list.append(DRINKED)
- BMI_list.append(BMI)
- HEART_PROBL_list.append(HEART_PROBL)
- STROKE_list.append(STROKE)
- data = {
- "HHID":HHID_list,
- "PN":PN_list,
- "SMOKED":SMOKED_list,
- "DRINKED":DRINKED_list,
- "BMI":BMI_list,
- "HEART_PROBL":HEART_PROBL_list,
- "STROKE":STROKE_list,
- "PHYSICAL_ACTIVITY_LEVEL":PHYSICAL_ACTIVITY_LEVEL_list
- }
- result_2016_two = pd.DataFrame(data)
- result_2016 = pd.merge(result_2016_one, result_2016_two, on=["HHID", "PN"], how="left")
- with(open("/root/r_base/HRS/2016/data/H16B_R.da", "r", encoding="utf-8") )as file:
- HHID_list = []
- PN_list = []
- MARITAL_STATUS_list = []
- EDUCATION_list = []
- # 逐行读取文件
- for line in file:
- HHID = line[0:6]
- PN = line[6:9]
- # 1. MARRIED (VOL)
- # 2. ANULLED (VOL)
- # 3. SEPARATED
- # 4. DIVORCED
- # 5. WIDOWED
- # 6. NEVER MARRIED
- # 7. OTHER (SPECIFY)
- # 8. DK (Don't Know); NA (Not Ascertained)
- # 9. RF (Refused)
- MARITAL_STATUS = line[275:276]
- # 1 Married or Partner; 5 other
- MARITAL_STATUS = "1" if MARITAL_STATUS=="1" else "5" if MARITAL_STATUS=="2" or MARITAL_STATUS=="3" or MARITAL_STATUS=="4" or MARITAL_STATUS=="5" or MARITAL_STATUS=="6" or MARITAL_STATUS=="7" else np.nan
- # 0 For no formal education
- # 1-11 .....Grades
- # 12 .......High school
- # 13-15 ....Some college
- # 16 .......College grad
- # 17 .......Post college (17+ years)
- # 97 .......Other
- # 98. DK (don't know); NA (not ascertained)
- # 99. RF (refused)
- EDUCATION = line[39:41]
- EDUCATION = np.nan if EDUCATION == "97" or EDUCATION == "98" or EDUCATION == "99" or EDUCATION == "" else EDUCATION
- HHID_list.append(HHID)
- PN_list.append(PN)
- MARITAL_STATUS_list.append(MARITAL_STATUS)
- EDUCATION_list.append(EDUCATION)
- data = {
- "HHID":HHID_list,
- "PN":PN_list,
- "MARITAL_STATUS":MARITAL_STATUS_list,
- "EDUCATION":EDUCATION_list
- }
- result_2016_four = pd.DataFrame(data)
- result_2016 = pd.merge(result_2016, result_2016_four, on=["HHID", "PN"], how="left")
- result = pd.concat([result, result_2016], axis=0)
- # 获取2018数据
- with(open("/root/r_base/HRS/2018/data/H18PR_R.da", "r", encoding="utf-8") )as file:
- HHID_list = []
- PN_list = []
- BIRTH_YEAR_list = []
- SEX_list = []
- # 逐行读取文件
- for line in file:
- HHID = line[0:6]
- PN = line[6:9]
- BIRTH_YEAR = line[25:29] if not line[25:29] == "" else np.nan
- # 1.Male
- # 2.Female
- SEX = line[20:21] if not line[20:21] == "" else np.nan
- HHID_list.append(HHID)
- PN_list.append(PN)
- BIRTH_YEAR_list.append(BIRTH_YEAR)
- SEX_list.append(SEX)
- data = {
- "HHID":HHID_list,
- "PN":PN_list,
- "BIRTH_YEAR":BIRTH_YEAR_list,
- "SEX":SEX_list,
- }
- data["WAVE"] = 2018
- result_2018_one = pd.DataFrame(data)
- with(open("/root/r_base/HRS/2018/data/H18C_R.da", "r", encoding="utf-8") )as file:
- HHID_list = []
- PN_list = []
- SMOKED_list = []
- DRINKED_list = []
- BMI_list = []
- HEART_PROBL_list = []
- PHYSICAL_ACTIVITY_LEVEL_list = []
- STROKE_list=[]
- # 逐行读取文件
- for line in file:
- HHID = line[0:6]
- PN = line[6:9]
- # 1. YES
- # 5. NO
- SMOKED = line[381:382]
- SMOKED = SMOKED if SMOKED == "1" or SMOKED == "5" else np.nan
- # 1. YES
- # 3. [VOL] NEVER HAVE USED ALCOHOL
- # 5. NO
- # 8. DK (don't know); NA (not ascertained)
- # 9. RF (refused)
- # 合并后
- # 1. Yes
- # 5. No [Inap in V502-V505]
- DRINKED = line[404:406]
- DRINKED = "1" if DRINKED == "1" else "5" if DRINKED == "5" or DRINKED == "3" else np.nan
- # 3. vigorous (vigorous activity more than once a week)
- # 2. moderate (moderate activity more than once a week)
- # 1. inactive (the rest)
- # 体重
- WEIGH = line[418:421].strip()
- WEIGH= float(WEIGH)*0.45359237 if not WEIGH=="" and not float(WEIGH)>400 and not float(WEIGH)<0 else np.nan
- # 身高
- HEIGHT_FEET = line[428:430].strip()
- HEIGHT_INCHES = line[430:435].strip()
- if not HEIGHT_FEET == "" and not HEIGHT_FEET == "8" and not HEIGHT_FEET == "9" and not pd.isna(WEIGH):
- if not HEIGHT_INCHES == "98" and not HEIGHT_INCHES == "99" and not HEIGHT_INCHES=="":
- HEIGHT = float(HEIGHT_FEET)*0.3048 + float(HEIGHT_INCHES)*0.0254
- # BMI
- BMI = WEIGH / math.pow(HEIGHT,2)
- else:
- HEIGHT = float(HEIGHT_FEET)*0.3048
- # BMI
- BMI = WEIGH / math.pow(HEIGHT,2)
- else :
- BMI = np.nan
- # -8. Web non-response
- # 1. YES
- # 4. [NEVER HAD HEART PROBLEM]
- # 5. NO
- # 6. [HAD HEART PROBLEM BEFORE, BUT DO NOT HAVE IT NOW AND AM NOT
- # TAKING MEDICATION FOR IT]
- # 8. DK (Don't Know); NA (Not Ascertained)
- # 9. RF (Refused)
- # A heart attack, coronary heart disease, angina, congestive heart failure, or other heart problems
- # 1. Yes
- # 5. No [Inap in V418-V421]
- HEART_PROBL = line[86:88]
- HEART_PROBL = np.nan if HEART_PROBL == "8" or HEART_PROBL == "9" or HEART_PROBL == "" or HEART_PROBL == "-8" else "1" if HEART_PROBL == "1" else "5"
- # -8. Web non-response
- # 1. YES
- # 2. [[VOL] POSSIBLE STROKE OR TIA (TRANSIENT ISCHEMIC
- # ATTACK)/POSSIBLE STROKE OR TIA (TRANSIENT ISCHEMIC ATTACK)]
- # 4. [NEVER HAD A STROKE]
- # 5. NO
- # 8. DK (Don't Know); NA (Not Ascertained)
- # 9. RF (Refused)
- # STROKE
- # 1. Yes
- # 5. No [Inap in V418-V421]
- STROKE = line[162:164]
- STROKE = np.nan if STROKE == "8" or STROKE == "9" or STROKE == "" or STROKE == "-8" else "5" if STROKE == "2" or STROKE == "4" or STROKE == "5" else "1"
- # 1. MORE THAN ONCE A WEEK
- # 2. ONCE A WEEK
- # 3. ONCE TO THREE TIMES A MONTH
- # 4. HARDLY EVER OR NEVER
- # 7. (VOL) EVERY DAY
- # 8. DK (Don't Know)
- # 9. RF (Refused)
- # 3. vigorous (vigorous activity more than once a week)
- # 2. moderate (moderate activity more than once a week)
- # 1. inactive (the rest)
- # 重度活动
- VIGOROUS_PHYSICAL = line[367:369]
- VIGOROUS_PHYSICAL = "3" if VIGOROUS_PHYSICAL == "1" or VIGOROUS_PHYSICAL == "2" or VIGOROUS_PHYSICAL == "7" else "1" if VIGOROUS_PHYSICAL == "3" or VIGOROUS_PHYSICAL == "4" else np.nan
- #中度活动
- MODERATE_PHYSICAL = line[369:371]
- MODERATE_PHYSICAL = "2" if MODERATE_PHYSICAL == "1" or MODERATE_PHYSICAL == "2" or MODERATE_PHYSICAL == "7" else "1" if MODERATE_PHYSICAL == "3" or MODERATE_PHYSICAL == "4" else np.nan
- # 轻度活动
- LIGHT_PHYSICAL = np.nan
- PHYSICAL_ACTIVITY_LEVEL = "3" if VIGOROUS_PHYSICAL=="3" else "2" if MODERATE_PHYSICAL=="2" else "1" if VIGOROUS_PHYSICAL=="1" or MODERATE_PHYSICAL=="1" else np.nan
- HHID_list.append(HHID)
- PN_list.append(PN)
- PHYSICAL_ACTIVITY_LEVEL_list.append(PHYSICAL_ACTIVITY_LEVEL)
- SMOKED_list.append(SMOKED)
- DRINKED_list.append(DRINKED)
- BMI_list.append(BMI)
- HEART_PROBL_list.append(HEART_PROBL)
- STROKE_list.append(STROKE)
- data = {
- "HHID":HHID_list,
- "PN":PN_list,
- "SMOKED":SMOKED_list,
- "DRINKED":DRINKED_list,
- "BMI":BMI_list,
- "HEART_PROBL":HEART_PROBL_list,
- "STROKE":STROKE_list,
- "PHYSICAL_ACTIVITY_LEVEL":PHYSICAL_ACTIVITY_LEVEL_list
- }
- result_2018_two = pd.DataFrame(data)
- result_2018 = pd.merge(result_2018_one, result_2018_two, on=["HHID", "PN"], how="left")
- with(open("/root/r_base/HRS/2018/data/H18B_R.da", "r", encoding="utf-8") )as file:
- HHID_list = []
- PN_list = []
- MARITAL_STATUS_list = []
- EDUCATION_list = []
- # 逐行读取文件
- for line in file:
- HHID = line[0:6]
- PN = line[6:9]
- # 1. MARRIED (VOL)
- # 2. ANULLED (VOL)
- # 3. SEPARATED
- # 4. DIVORCED
- # 5. WIDOWED
- # 6. NEVER MARRIED
- # 7. OTHER (SPECIFY)
- # 8. DK (Don't Know); NA (Not Ascertained)
- # 9. RF (Refused)
- MARITAL_STATUS = line[287:288]
- # 1 Married or Partner; 5 other
- MARITAL_STATUS = "1" if MARITAL_STATUS=="1" else "5" if MARITAL_STATUS=="2" or MARITAL_STATUS=="3" or MARITAL_STATUS=="4" or MARITAL_STATUS=="5" or MARITAL_STATUS=="6" or MARITAL_STATUS=="7" else np.nan
- # 0 For no formal education
- # 1-11 .....Grades
- # 12 .......High school
- # 13-15 ....Some college
- # 16 .......College grad
- # 17 .......Post college (17+ years)
- # 97 .......Other
- # 98. DK (don't know); NA (not ascertained)
- # 99. RF (refused)
- EDUCATION = line[41:43]
- EDUCATION = np.nan if EDUCATION == "97" or EDUCATION == "98" or EDUCATION == "99" or EDUCATION == "" else EDUCATION
- HHID_list.append(HHID)
- PN_list.append(PN)
- MARITAL_STATUS_list.append(MARITAL_STATUS)
- EDUCATION_list.append(EDUCATION)
- data = {
- "HHID":HHID_list,
- "PN":PN_list,
- "MARITAL_STATUS":MARITAL_STATUS_list,
- "EDUCATION":EDUCATION_list
- }
- result_2018_four = pd.DataFrame(data)
- result_2018 = pd.merge(result_2018, result_2018_four, on=["HHID", "PN"], how="left")
- result = pd.concat([result, result_2018], axis=0)
- # 获取2020数据
- with(open("/root/r_base/HRS/2020/data/H20PR_R.da", "r", encoding="utf-8") )as file:
- HHID_list = []
- PN_list = []
- BIRTH_YEAR_list = []
- SEX_list = []
- # 逐行读取文件
- for line in file:
- HHID = line[0:6]
- PN = line[6:9]
- BIRTH_YEAR = line[36:40] if not line[36:40] == "" else np.nan
- # 1.Male
- # 2.Female
- SEX = line[33:34] if not line[33:34] == "" else np.nan
- HHID_list.append(HHID)
- PN_list.append(PN)
- BIRTH_YEAR_list.append(BIRTH_YEAR)
- SEX_list.append(SEX)
- data = {
- "HHID":HHID_list,
- "PN":PN_list,
- "BIRTH_YEAR":BIRTH_YEAR_list,
- "SEX":SEX_list,
- }
- data["WAVE"] = 2020
- result_2020_one = pd.DataFrame(data)
- with(open("/root/r_base/HRS/2020/data/H20C_R.da", "r", encoding="utf-8") )as file:
- HHID_list = []
- PN_list = []
- SMOKED_list = []
- DRINKED_list = []
- BMI_list = []
- HEART_PROBL_list = []
- PHYSICAL_ACTIVITY_LEVEL_list = []
- STROKE_list=[]
- # 逐行读取文件
- for line in file:
- HHID = line[0:6]
- PN = line[6:9]
- # 1. YES
- # 5. NO
- SMOKED = line[339:340]
- SMOKED = SMOKED if SMOKED == "1" or SMOKED == "5" else np.nan
- # 1. YES
- # 3. [VOL] NEVER HAVE USED ALCOHOL
- # 5. NO
- # 8. DK (don't know); NA (not ascertained)
- # 9. RF (refused)
- # 合并后
- # 1. Yes
- # 5. No [Inap in V502-V505]
- DRINKED = line[363:365]
- DRINKED = "1" if DRINKED == "1" else "5" if DRINKED == "5" or DRINKED == "3" else np.nan
- # 3. vigorous (vigorous activity more than once a week)
- # 2. moderate (moderate activity more than once a week)
- # 1. inactive (the rest)
- # 体重
- WEIGH = line[380:383].strip()
- WEIGH= float(WEIGH)*0.45359237 if not WEIGH=="" and not float(WEIGH)>400 and not float(WEIGH)<0 else np.nan
- # 身高
- HEIGHT_FEET = line[389:390].strip()
- HEIGHT_INCHES = line[390:395].strip()
- if not HEIGHT_FEET == "" and not HEIGHT_FEET == "8" and not HEIGHT_FEET == "9" and not pd.isna(WEIGH):
- if not HEIGHT_INCHES == "98" and not HEIGHT_INCHES=="":
- HEIGHT = float(HEIGHT_FEET)*0.3048 + float(HEIGHT_INCHES)*0.0254
- # BMI
- BMI = WEIGH / math.pow(HEIGHT,2)
- else:
- HEIGHT = float(HEIGHT_FEET)*0.3048
- # BMI
- BMI = WEIGH / math.pow(HEIGHT,2)
- else :
- BMI = np.nan
- # -8. Web non-response
- # 1. YES
- # 4. [NEVER HAD HEART PROBLEM]
- # 5. NO
- # 6. [HAD HEART PROBLEM BEFORE, BUT DO NOT HAVE IT NOW AND AM NOT
- # TAKING MEDICATION FOR IT]
- # 8. DK (Don't Know); NA (Not Ascertained)
- # 9. RF (Refused)
- # A heart attack, coronary heart disease, angina, congestive heart failure, or other heart problems
- # 1. Yes
- # 5. No [Inap in V418-V421]
- HEART_PROBL = line[73:75]
- HEART_PROBL = np.nan if HEART_PROBL == "8" or HEART_PROBL == "9" or HEART_PROBL == "" or HEART_PROBL == "-8" else "1" if HEART_PROBL == "1" else "5"
- # -8. Web non-response
- # 1. YES
- # 2. [[VOL] POSSIBLE STROKE OR TIA (TRANSIENT ISCHEMIC
- # ATTACK)/POSSIBLE STROKE OR TIA (TRANSIENT ISCHEMIC ATTACK)]
- # 4. [NEVER HAD A STROKE]
- # 5. NO
- # 8. DK (Don't Know); NA (Not Ascertained)
- # 9. RF (Refused)
- # STROKE
- # 1. Yes
- # 5. No [Inap in V418-V421]
- STROKE = line[138:140]
- STROKE = np.nan if STROKE == "8" or STROKE == "9" or STROKE == "" or STROKE == "-8" else "5" if STROKE == "2" or STROKE == "4" or STROKE == "5" else "1"
- # 1. MORE THAN ONCE A WEEK
- # 2. ONCE A WEEK
- # 3. ONCE TO THREE TIMES A MONTH
- # 4. HARDLY EVER OR NEVER
- # 7. (VOL) EVERY DAY
- # 8. DK (Don't Know)
- # 9. RF (Refused)
- # 3. vigorous (vigorous activity more than once a week)
- # 2. moderate (moderate activity more than once a week)
- # 1. inactive (the rest)
- # 重度活动
- VIGOROUS_PHYSICAL = line[325:327]
- VIGOROUS_PHYSICAL = "3" if VIGOROUS_PHYSICAL == "1" or VIGOROUS_PHYSICAL == "2" or VIGOROUS_PHYSICAL == "7" else "1" if VIGOROUS_PHYSICAL == "3" or VIGOROUS_PHYSICAL == "4" else np.nan
- #中度活动
- MODERATE_PHYSICAL = line[327:329]
- MODERATE_PHYSICAL = "2" if MODERATE_PHYSICAL == "1" or MODERATE_PHYSICAL == "2" or MODERATE_PHYSICAL == "7" else "1" if MODERATE_PHYSICAL == "3" or MODERATE_PHYSICAL == "4" else np.nan
- # 轻度活动
- LIGHT_PHYSICAL = np.nan
- PHYSICAL_ACTIVITY_LEVEL = "3" if VIGOROUS_PHYSICAL=="3" else "2" if MODERATE_PHYSICAL=="2" else "1" if VIGOROUS_PHYSICAL=="1" or MODERATE_PHYSICAL=="1" else np.nan
- HHID_list.append(HHID)
- PN_list.append(PN)
- PHYSICAL_ACTIVITY_LEVEL_list.append(PHYSICAL_ACTIVITY_LEVEL)
- SMOKED_list.append(SMOKED)
- DRINKED_list.append(DRINKED)
- BMI_list.append(BMI)
- HEART_PROBL_list.append(HEART_PROBL)
- STROKE_list.append(STROKE)
- data = {
- "HHID":HHID_list,
- "PN":PN_list,
- "SMOKED":SMOKED_list,
- "DRINKED":DRINKED_list,
- "BMI":BMI_list,
- "HEART_PROBL":HEART_PROBL_list,
- "STROKE":STROKE_list,
- "PHYSICAL_ACTIVITY_LEVEL":PHYSICAL_ACTIVITY_LEVEL_list
- }
- result_2020_two = pd.DataFrame(data)
- result_2020 = pd.merge(result_2020_one, result_2020_two, on=["HHID", "PN"], how="left")
- with(open("/root/r_base/HRS/2020/data/H20B_R.da", "r", encoding="utf-8") )as file:
- HHID_list = []
- PN_list = []
- MARITAL_STATUS_list = []
- EDUCATION_list = []
- # 逐行读取文件
- for line in file:
- HHID = line[0:6]
- PN = line[6:9]
- # 1. MARRIED (VOL)
- # 2. ANULLED (VOL)
- # 3. SEPARATED
- # 4. DIVORCED
- # 5. WIDOWED
- # 6. NEVER MARRIED
- # 7. OTHER (SPECIFY)
- # 8. DK (Don't Know); NA (Not Ascertained)
- # 9. RF (Refused)
- MARITAL_STATUS = line[304:305]
- # 1 Married or Partner; 5 other
- MARITAL_STATUS = "1" if MARITAL_STATUS=="1" else "5" if MARITAL_STATUS=="2" or MARITAL_STATUS=="3" or MARITAL_STATUS=="4" or MARITAL_STATUS=="5" or MARITAL_STATUS=="6" or MARITAL_STATUS=="7" else np.nan
- # 0 For no formal education
- # 1-11 .....Grades
- # 12 .......High school
- # 13-15 ....Some college
- # 16 .......College grad
- # 17 .......Post college (17+ years)
- # 97 .......Other
- # 98. DK (don't know); NA (not ascertained)
- # 99. RF (refused)
- EDUCATION = line[40:42]
- EDUCATION = np.nan if EDUCATION == "97" or EDUCATION == "98" or EDUCATION == "99" or EDUCATION == "" else EDUCATION
- HHID_list.append(HHID)
- PN_list.append(PN)
- MARITAL_STATUS_list.append(MARITAL_STATUS)
- EDUCATION_list.append(EDUCATION)
- data = {
- "HHID":HHID_list,
- "PN":PN_list,
- "MARITAL_STATUS":MARITAL_STATUS_list,
- "EDUCATION":EDUCATION_list
- }
- result_2020_four = pd.DataFrame(data)
- result_2020 = pd.merge(result_2020, result_2020_four, on=["HHID", "PN"], how="left")
- result = pd.concat([result, result_2020], axis=0)
- result.to_csv("/root/r_base/HRS/result_all.csv", index=False)
|