ruby-upb.c 258 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753675467556756675767586759676067616762676367646765676667676768676967706771677267736774677567766777677867796780678167826783678467856786678767886789679067916792679367946795679667976798679968006801680268036804680568066807680868096810681168126813681468156816681768186819682068216822682368246825682668276828682968306831683268336834683568366837683868396840684168426843684468456846684768486849685068516852685368546855685668576858685968606861686268636864686568666867686868696870687168726873687468756876687768786879688068816882688368846885688668876888688968906891689268936894689568966897689868996900690169026903690469056906690769086909691069116912691369146915691669176918691969206921692269236924692569266927692869296930693169326933693469356936693769386939694069416942694369446945694669476948694969506951695269536954695569566957695869596960696169626963696469656966696769686969697069716972697369746975697669776978697969806981698269836984698569866987698869896990699169926993699469956996699769986999700070017002700370047005700670077008700970107011701270137014701570167017701870197020702170227023702470257026702770287029703070317032703370347035703670377038703970407041704270437044704570467047704870497050705170527053705470557056705770587059706070617062706370647065706670677068706970707071707270737074707570767077707870797080708170827083708470857086708770887089709070917092709370947095709670977098709971007101710271037104710571067107710871097110711171127113711471157116711771187119712071217122712371247125712671277128712971307131713271337134713571367137713871397140714171427143714471457146714771487149715071517152715371547155715671577158715971607161716271637164716571667167716871697170717171727173717471757176717771787179718071817182718371847185718671877188718971907191719271937194719571967197719871997200720172027203720472057206720772087209721072117212721372147215721672177218721972207221722272237224722572267227722872297230723172327233723472357236723772387239724072417242724372447245724672477248724972507251725272537254725572567257725872597260726172627263726472657266726772687269727072717272727372747275727672777278727972807281728272837284728572867287728872897290729172927293729472957296729772987299730073017302730373047305730673077308730973107311731273137314731573167317731873197320732173227323732473257326732773287329733073317332733373347335733673377338733973407341734273437344734573467347734873497350735173527353735473557356735773587359736073617362736373647365736673677368736973707371737273737374737573767377737873797380738173827383738473857386738773887389739073917392739373947395739673977398739974007401740274037404740574067407740874097410741174127413741474157416741774187419742074217422742374247425742674277428742974307431743274337434743574367437743874397440744174427443744474457446744774487449745074517452745374547455745674577458745974607461746274637464746574667467746874697470747174727473747474757476747774787479748074817482748374847485748674877488748974907491749274937494749574967497749874997500750175027503750475057506750775087509751075117512751375147515751675177518751975207521752275237524752575267527752875297530753175327533753475357536753775387539754075417542754375447545754675477548754975507551755275537554755575567557755875597560756175627563756475657566756775687569757075717572757375747575757675777578757975807581758275837584758575867587758875897590759175927593759475957596759775987599760076017602760376047605760676077608760976107611761276137614761576167617761876197620762176227623762476257626762776287629763076317632763376347635763676377638763976407641764276437644764576467647764876497650765176527653765476557656765776587659766076617662766376647665766676677668766976707671767276737674767576767677767876797680768176827683768476857686768776887689769076917692769376947695769676977698769977007701770277037704770577067707770877097710771177127713771477157716771777187719772077217722772377247725772677277728772977307731773277337734773577367737773877397740774177427743774477457746774777487749775077517752775377547755775677577758775977607761776277637764776577667767776877697770777177727773777477757776777777787779778077817782778377847785778677877788778977907791779277937794779577967797779877997800780178027803780478057806780778087809781078117812781378147815781678177818781978207821782278237824782578267827782878297830783178327833783478357836783778387839784078417842784378447845784678477848784978507851785278537854785578567857785878597860786178627863786478657866786778687869787078717872787378747875787678777878787978807881788278837884788578867887788878897890789178927893789478957896789778987899790079017902790379047905790679077908790979107911791279137914791579167917791879197920792179227923792479257926792779287929793079317932793379347935793679377938793979407941794279437944794579467947794879497950795179527953795479557956795779587959796079617962796379647965796679677968796979707971797279737974797579767977797879797980798179827983798479857986798779887989799079917992799379947995799679977998799980008001800280038004800580068007800880098010801180128013801480158016801780188019802080218022802380248025802680278028802980308031803280338034803580368037803880398040804180428043804480458046804780488049805080518052805380548055805680578058805980608061806280638064806580668067806880698070807180728073807480758076807780788079808080818082808380848085808680878088808980908091809280938094809580968097809880998100810181028103810481058106810781088109811081118112811381148115811681178118811981208121812281238124812581268127812881298130813181328133813481358136813781388139814081418142814381448145814681478148814981508151815281538154815581568157815881598160816181628163816481658166816781688169817081718172817381748175817681778178817981808181818281838184818581868187818881898190819181928193819481958196819781988199820082018202820382048205820682078208820982108211821282138214821582168217821882198220822182228223822482258226822782288229823082318232823382348235823682378238823982408241824282438244824582468247824882498250825182528253825482558256825782588259826082618262826382648265826682678268826982708271827282738274827582768277827882798280828182828283828482858286828782888289829082918292829382948295829682978298829983008301830283038304830583068307830883098310831183128313831483158316831783188319832083218322832383248325832683278328832983308331833283338334833583368337833883398340834183428343834483458346834783488349835083518352835383548355835683578358835983608361836283638364836583668367836883698370837183728373837483758376837783788379838083818382838383848385838683878388838983908391839283938394839583968397839883998400840184028403840484058406840784088409841084118412841384148415841684178418841984208421842284238424842584268427842884298430843184328433843484358436843784388439844084418442844384448445844684478448844984508451845284538454845584568457845884598460846184628463846484658466846784688469847084718472847384748475847684778478847984808481848284838484848584868487848884898490849184928493849484958496849784988499850085018502850385048505850685078508850985108511851285138514851585168517851885198520852185228523852485258526852785288529853085318532853385348535853685378538853985408541854285438544854585468547854885498550855185528553855485558556855785588559856085618562856385648565856685678568856985708571857285738574857585768577857885798580858185828583858485858586858785888589859085918592859385948595859685978598859986008601860286038604860586068607860886098610861186128613861486158616861786188619862086218622862386248625862686278628862986308631863286338634863586368637863886398640864186428643864486458646864786488649865086518652865386548655865686578658865986608661866286638664866586668667866886698670867186728673867486758676867786788679868086818682868386848685868686878688868986908691869286938694869586968697869886998700870187028703870487058706870787088709871087118712871387148715871687178718871987208721872287238724872587268727872887298730873187328733873487358736873787388739874087418742874387448745874687478748874987508751875287538754875587568757875887598760876187628763876487658766876787688769877087718772877387748775877687778778877987808781878287838784878587868787878887898790879187928793879487958796879787988799880088018802880388048805880688078808880988108811881288138814881588168817881888198820882188228823882488258826882788288829883088318832883388348835883688378838883988408841884288438844884588468847884888498850885188528853885488558856885788588859886088618862886388648865886688678868886988708871887288738874887588768877887888798880888188828883888488858886888788888889889088918892889388948895889688978898889989008901890289038904890589068907890889098910891189128913
  1. /* Amalgamated source file */
  2. #include "ruby-upb.h"
  3. /*
  4. * This is where we define macros used across upb.
  5. *
  6. * All of these macros are undef'd in port_undef.inc to avoid leaking them to
  7. * users.
  8. *
  9. * The correct usage is:
  10. *
  11. * #include "upb/foobar.h"
  12. * #include "upb/baz.h"
  13. *
  14. * // MUST be last included header.
  15. * #include "upb/port_def.inc"
  16. *
  17. * // Code for this file.
  18. * // <...>
  19. *
  20. * // Can be omitted for .c files, required for .h.
  21. * #include "upb/port_undef.inc"
  22. *
  23. * This file is private and must not be included by users!
  24. */
  25. #if !((defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \
  26. (defined(__cplusplus) && __cplusplus >= 201103L) || \
  27. (defined(_MSC_VER) && _MSC_VER >= 1900))
  28. #error upb requires C99 or C++11 or MSVC >= 2015.
  29. #endif
  30. #include <stdint.h>
  31. #include <stddef.h>
  32. #if UINTPTR_MAX == 0xffffffff
  33. #define UPB_SIZE(size32, size64) size32
  34. #else
  35. #define UPB_SIZE(size32, size64) size64
  36. #endif
  37. /* If we always read/write as a consistent type to each address, this shouldn't
  38. * violate aliasing.
  39. */
  40. #define UPB_PTR_AT(msg, ofs, type) ((type*)((char*)(msg) + (ofs)))
  41. #define UPB_READ_ONEOF(msg, fieldtype, offset, case_offset, case_val, default) \
  42. *UPB_PTR_AT(msg, case_offset, int) == case_val \
  43. ? *UPB_PTR_AT(msg, offset, fieldtype) \
  44. : default
  45. #define UPB_WRITE_ONEOF(msg, fieldtype, offset, value, case_offset, case_val) \
  46. *UPB_PTR_AT(msg, case_offset, int) = case_val; \
  47. *UPB_PTR_AT(msg, offset, fieldtype) = value;
  48. #define UPB_MAPTYPE_STRING 0
  49. /* UPB_INLINE: inline if possible, emit standalone code if required. */
  50. #ifdef __cplusplus
  51. #define UPB_INLINE inline
  52. #elif defined (__GNUC__) || defined(__clang__)
  53. #define UPB_INLINE static __inline__
  54. #else
  55. #define UPB_INLINE static
  56. #endif
  57. #define UPB_ALIGN_UP(size, align) (((size) + (align) - 1) / (align) * (align))
  58. #define UPB_ALIGN_DOWN(size, align) ((size) / (align) * (align))
  59. #define UPB_ALIGN_MALLOC(size) UPB_ALIGN_UP(size, 16)
  60. #define UPB_ALIGN_OF(type) offsetof (struct { char c; type member; }, member)
  61. /* Hints to the compiler about likely/unlikely branches. */
  62. #if defined (__GNUC__) || defined(__clang__)
  63. #define UPB_LIKELY(x) __builtin_expect((x),1)
  64. #define UPB_UNLIKELY(x) __builtin_expect((x),0)
  65. #else
  66. #define UPB_LIKELY(x) (x)
  67. #define UPB_UNLIKELY(x) (x)
  68. #endif
  69. /* Macros for function attributes on compilers that support them. */
  70. #ifdef __GNUC__
  71. #define UPB_FORCEINLINE __inline__ __attribute__((always_inline))
  72. #define UPB_NOINLINE __attribute__((noinline))
  73. #define UPB_NORETURN __attribute__((__noreturn__))
  74. #define UPB_PRINTF(str, first_vararg) __attribute__((format (printf, str, first_vararg)))
  75. #elif defined(_MSC_VER)
  76. #define UPB_NOINLINE
  77. #define UPB_FORCEINLINE
  78. #define UPB_NORETURN __declspec(noreturn)
  79. #define UPB_PRINTF(str, first_vararg)
  80. #else /* !defined(__GNUC__) */
  81. #define UPB_FORCEINLINE
  82. #define UPB_NOINLINE
  83. #define UPB_NORETURN
  84. #define UPB_PRINTF(str, first_vararg)
  85. #endif
  86. #define UPB_MAX(x, y) ((x) > (y) ? (x) : (y))
  87. #define UPB_MIN(x, y) ((x) < (y) ? (x) : (y))
  88. #define UPB_UNUSED(var) (void)var
  89. /* UPB_ASSUME(): in release mode, we tell the compiler to assume this is true.
  90. */
  91. #ifdef NDEBUG
  92. #ifdef __GNUC__
  93. #define UPB_ASSUME(expr) if (!(expr)) __builtin_unreachable()
  94. #elif defined _MSC_VER
  95. #define UPB_ASSUME(expr) if (!(expr)) __assume(0)
  96. #else
  97. #define UPB_ASSUME(expr) do {} while (false && (expr))
  98. #endif
  99. #else
  100. #define UPB_ASSUME(expr) assert(expr)
  101. #endif
  102. /* UPB_ASSERT(): in release mode, we use the expression without letting it be
  103. * evaluated. This prevents "unused variable" warnings. */
  104. #ifdef NDEBUG
  105. #define UPB_ASSERT(expr) do {} while (false && (expr))
  106. #else
  107. #define UPB_ASSERT(expr) assert(expr)
  108. #endif
  109. #if defined(__GNUC__) || defined(__clang__)
  110. #define UPB_UNREACHABLE() do { assert(0); __builtin_unreachable(); } while(0)
  111. #else
  112. #define UPB_UNREACHABLE() do { assert(0); } while(0)
  113. #endif
  114. /* UPB_SETJMP() / UPB_LONGJMP(): avoid setting/restoring signal mask. */
  115. #ifdef __APPLE__
  116. #define UPB_SETJMP(buf) _setjmp(buf)
  117. #define UPB_LONGJMP(buf, val) _longjmp(buf, val)
  118. #else
  119. #define UPB_SETJMP(buf) setjmp(buf)
  120. #define UPB_LONGJMP(buf, val) longjmp(buf, val)
  121. #endif
  122. /* Configure whether fasttable is switched on or not. *************************/
  123. #if defined(__x86_64__) && defined(__GNUC__)
  124. #define UPB_FASTTABLE_SUPPORTED 1
  125. #else
  126. #define UPB_FASTTABLE_SUPPORTED 0
  127. #endif
  128. /* define UPB_ENABLE_FASTTABLE to force fast table support.
  129. * This is useful when we want to ensure we are really getting fasttable,
  130. * for example for testing or benchmarking. */
  131. #if defined(UPB_ENABLE_FASTTABLE)
  132. #if !UPB_FASTTABLE_SUPPORTED
  133. #error fasttable is x86-64 + Clang/GCC only
  134. #endif
  135. #define UPB_FASTTABLE 1
  136. /* Define UPB_TRY_ENABLE_FASTTABLE to use fasttable if possible.
  137. * This is useful for releasing code that might be used on multiple platforms,
  138. * for example the PHP or Ruby C extensions. */
  139. #elif defined(UPB_TRY_ENABLE_FASTTABLE)
  140. #define UPB_FASTTABLE UPB_FASTTABLE_SUPPORTED
  141. #else
  142. #define UPB_FASTTABLE 0
  143. #endif
  144. /* UPB_FASTTABLE_INIT() allows protos compiled for fasttable to gracefully
  145. * degrade to non-fasttable if we are using UPB_TRY_ENABLE_FASTTABLE. */
  146. #if !UPB_FASTTABLE && defined(UPB_TRY_ENABLE_FASTTABLE)
  147. #define UPB_FASTTABLE_INIT(...)
  148. #else
  149. #define UPB_FASTTABLE_INIT(...) __VA_ARGS__
  150. #endif
  151. #undef UPB_FASTTABLE_SUPPORTED
  152. /* ASAN poisoning (for arena) *************************************************/
  153. #if defined(__SANITIZE_ADDRESS__)
  154. #define UPB_ASAN 1
  155. #ifdef __cplusplus
  156. extern "C" {
  157. #endif
  158. void __asan_poison_memory_region(void const volatile *addr, size_t size);
  159. void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
  160. #ifdef __cplusplus
  161. } /* extern "C" */
  162. #endif
  163. #define UPB_POISON_MEMORY_REGION(addr, size) \
  164. __asan_poison_memory_region((addr), (size))
  165. #define UPB_UNPOISON_MEMORY_REGION(addr, size) \
  166. __asan_unpoison_memory_region((addr), (size))
  167. #else
  168. #define UPB_ASAN 0
  169. #define UPB_POISON_MEMORY_REGION(addr, size) \
  170. ((void)(addr), (void)(size))
  171. #define UPB_UNPOISON_MEMORY_REGION(addr, size) \
  172. ((void)(addr), (void)(size))
  173. #endif
  174. #include <setjmp.h>
  175. #include <string.h>
  176. /* Must be last. */
  177. /* Maps descriptor type -> elem_size_lg2. */
  178. static const uint8_t desctype_to_elem_size_lg2[] = {
  179. -1, /* invalid descriptor type */
  180. 3, /* DOUBLE */
  181. 2, /* FLOAT */
  182. 3, /* INT64 */
  183. 3, /* UINT64 */
  184. 2, /* INT32 */
  185. 3, /* FIXED64 */
  186. 2, /* FIXED32 */
  187. 0, /* BOOL */
  188. UPB_SIZE(3, 4), /* STRING */
  189. UPB_SIZE(2, 3), /* GROUP */
  190. UPB_SIZE(2, 3), /* MESSAGE */
  191. UPB_SIZE(3, 4), /* BYTES */
  192. 2, /* UINT32 */
  193. 2, /* ENUM */
  194. 2, /* SFIXED32 */
  195. 3, /* SFIXED64 */
  196. 2, /* SINT32 */
  197. 3, /* SINT64 */
  198. };
  199. /* Maps descriptor type -> upb map size. */
  200. static const uint8_t desctype_to_mapsize[] = {
  201. -1, /* invalid descriptor type */
  202. 8, /* DOUBLE */
  203. 4, /* FLOAT */
  204. 8, /* INT64 */
  205. 8, /* UINT64 */
  206. 4, /* INT32 */
  207. 8, /* FIXED64 */
  208. 4, /* FIXED32 */
  209. 1, /* BOOL */
  210. UPB_MAPTYPE_STRING, /* STRING */
  211. sizeof(void *), /* GROUP */
  212. sizeof(void *), /* MESSAGE */
  213. UPB_MAPTYPE_STRING, /* BYTES */
  214. 4, /* UINT32 */
  215. 4, /* ENUM */
  216. 4, /* SFIXED32 */
  217. 8, /* SFIXED64 */
  218. 4, /* SINT32 */
  219. 8, /* SINT64 */
  220. };
  221. static const unsigned fixed32_ok = (1 << UPB_DTYPE_FLOAT) |
  222. (1 << UPB_DTYPE_FIXED32) |
  223. (1 << UPB_DTYPE_SFIXED32);
  224. static const unsigned fixed64_ok = (1 << UPB_DTYPE_DOUBLE) |
  225. (1 << UPB_DTYPE_FIXED64) |
  226. (1 << UPB_DTYPE_SFIXED64);
  227. /* Op: an action to be performed for a wire-type/field-type combination. */
  228. #define OP_SCALAR_LG2(n) (n) /* n in [0, 2, 3] => op in [0, 2, 3] */
  229. #define OP_STRING 4
  230. #define OP_BYTES 5
  231. #define OP_SUBMSG 6
  232. /* Ops above are scalar-only. Repeated fields can use any op. */
  233. #define OP_FIXPCK_LG2(n) (n + 5) /* n in [2, 3] => op in [7, 8] */
  234. #define OP_VARPCK_LG2(n) (n + 9) /* n in [0, 2, 3] => op in [9, 11, 12] */
  235. static const int8_t varint_ops[19] = {
  236. -1, /* field not found */
  237. -1, /* DOUBLE */
  238. -1, /* FLOAT */
  239. OP_SCALAR_LG2(3), /* INT64 */
  240. OP_SCALAR_LG2(3), /* UINT64 */
  241. OP_SCALAR_LG2(2), /* INT32 */
  242. -1, /* FIXED64 */
  243. -1, /* FIXED32 */
  244. OP_SCALAR_LG2(0), /* BOOL */
  245. -1, /* STRING */
  246. -1, /* GROUP */
  247. -1, /* MESSAGE */
  248. -1, /* BYTES */
  249. OP_SCALAR_LG2(2), /* UINT32 */
  250. OP_SCALAR_LG2(2), /* ENUM */
  251. -1, /* SFIXED32 */
  252. -1, /* SFIXED64 */
  253. OP_SCALAR_LG2(2), /* SINT32 */
  254. OP_SCALAR_LG2(3), /* SINT64 */
  255. };
  256. static const int8_t delim_ops[37] = {
  257. /* For non-repeated field type. */
  258. -1, /* field not found */
  259. -1, /* DOUBLE */
  260. -1, /* FLOAT */
  261. -1, /* INT64 */
  262. -1, /* UINT64 */
  263. -1, /* INT32 */
  264. -1, /* FIXED64 */
  265. -1, /* FIXED32 */
  266. -1, /* BOOL */
  267. OP_STRING, /* STRING */
  268. -1, /* GROUP */
  269. OP_SUBMSG, /* MESSAGE */
  270. OP_BYTES, /* BYTES */
  271. -1, /* UINT32 */
  272. -1, /* ENUM */
  273. -1, /* SFIXED32 */
  274. -1, /* SFIXED64 */
  275. -1, /* SINT32 */
  276. -1, /* SINT64 */
  277. /* For repeated field type. */
  278. OP_FIXPCK_LG2(3), /* REPEATED DOUBLE */
  279. OP_FIXPCK_LG2(2), /* REPEATED FLOAT */
  280. OP_VARPCK_LG2(3), /* REPEATED INT64 */
  281. OP_VARPCK_LG2(3), /* REPEATED UINT64 */
  282. OP_VARPCK_LG2(2), /* REPEATED INT32 */
  283. OP_FIXPCK_LG2(3), /* REPEATED FIXED64 */
  284. OP_FIXPCK_LG2(2), /* REPEATED FIXED32 */
  285. OP_VARPCK_LG2(0), /* REPEATED BOOL */
  286. OP_STRING, /* REPEATED STRING */
  287. OP_SUBMSG, /* REPEATED GROUP */
  288. OP_SUBMSG, /* REPEATED MESSAGE */
  289. OP_BYTES, /* REPEATED BYTES */
  290. OP_VARPCK_LG2(2), /* REPEATED UINT32 */
  291. OP_VARPCK_LG2(2), /* REPEATED ENUM */
  292. OP_FIXPCK_LG2(2), /* REPEATED SFIXED32 */
  293. OP_FIXPCK_LG2(3), /* REPEATED SFIXED64 */
  294. OP_VARPCK_LG2(2), /* REPEATED SINT32 */
  295. OP_VARPCK_LG2(3), /* REPEATED SINT64 */
  296. };
  297. typedef union {
  298. bool bool_val;
  299. uint32_t uint32_val;
  300. uint64_t uint64_val;
  301. uint32_t size;
  302. } wireval;
  303. static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg,
  304. const upb_msglayout *layout);
  305. UPB_NORETURN static void decode_err(upb_decstate *d) { UPB_LONGJMP(d->err, 1); }
  306. // We don't want to mark this NORETURN, see comment in .h.
  307. // Unfortunately this code to suppress the warning doesn't appear to be working.
  308. #ifdef __clang__
  309. #pragma clang diagnostic push
  310. #pragma clang diagnostic ignored "-Wunknown-warning-option"
  311. #pragma clang diagnostic ignored "-Wsuggest-attribute"
  312. #endif
  313. const char *fastdecode_err(upb_decstate *d) {
  314. longjmp(d->err, 1);
  315. return NULL;
  316. }
  317. #ifdef __clang__
  318. #pragma clang diagnostic pop
  319. #endif
  320. const uint8_t upb_utf8_offsets[] = {
  321. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  322. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  323. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  324. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  325. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  326. 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  327. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  328. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  329. 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  330. 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
  331. 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0,
  332. };
  333. static void decode_verifyutf8(upb_decstate *d, const char *buf, int len) {
  334. if (!decode_verifyutf8_inl(buf, len)) decode_err(d);
  335. }
  336. static bool decode_reserve(upb_decstate *d, upb_array *arr, size_t elem) {
  337. bool need_realloc = arr->size - arr->len < elem;
  338. if (need_realloc && !_upb_array_realloc(arr, arr->len + elem, &d->arena)) {
  339. decode_err(d);
  340. }
  341. return need_realloc;
  342. }
  343. typedef struct {
  344. const char *ptr;
  345. uint64_t val;
  346. } decode_vret;
  347. UPB_NOINLINE
  348. static decode_vret decode_longvarint64(const char *ptr, uint64_t val) {
  349. decode_vret ret = {NULL, 0};
  350. uint64_t byte;
  351. int i;
  352. for (i = 1; i < 10; i++) {
  353. byte = (uint8_t)ptr[i];
  354. val += (byte - 1) << (i * 7);
  355. if (!(byte & 0x80)) {
  356. ret.ptr = ptr + i + 1;
  357. ret.val = val;
  358. return ret;
  359. }
  360. }
  361. return ret;
  362. }
  363. UPB_FORCEINLINE
  364. static const char *decode_varint64(upb_decstate *d, const char *ptr,
  365. uint64_t *val) {
  366. uint64_t byte = (uint8_t)*ptr;
  367. if (UPB_LIKELY((byte & 0x80) == 0)) {
  368. *val = byte;
  369. return ptr + 1;
  370. } else {
  371. decode_vret res = decode_longvarint64(ptr, byte);
  372. if (!res.ptr) decode_err(d);
  373. *val = res.val;
  374. return res.ptr;
  375. }
  376. }
  377. UPB_FORCEINLINE
  378. static const char *decode_tag(upb_decstate *d, const char *ptr,
  379. uint32_t *val) {
  380. uint64_t byte = (uint8_t)*ptr;
  381. if (UPB_LIKELY((byte & 0x80) == 0)) {
  382. *val = byte;
  383. return ptr + 1;
  384. } else {
  385. const char *start = ptr;
  386. decode_vret res = decode_longvarint64(ptr, byte);
  387. ptr = res.ptr;
  388. *val = res.val;
  389. if (!ptr || *val > UINT32_MAX || ptr - start > 5) decode_err(d);
  390. return ptr;
  391. }
  392. }
  393. static void decode_munge(int type, wireval *val) {
  394. switch (type) {
  395. case UPB_DESCRIPTOR_TYPE_BOOL:
  396. val->bool_val = val->uint64_val != 0;
  397. break;
  398. case UPB_DESCRIPTOR_TYPE_SINT32: {
  399. uint32_t n = val->uint32_val;
  400. val->uint32_val = (n >> 1) ^ -(int32_t)(n & 1);
  401. break;
  402. }
  403. case UPB_DESCRIPTOR_TYPE_SINT64: {
  404. uint64_t n = val->uint64_val;
  405. val->uint64_val = (n >> 1) ^ -(int64_t)(n & 1);
  406. break;
  407. }
  408. case UPB_DESCRIPTOR_TYPE_INT32:
  409. case UPB_DESCRIPTOR_TYPE_UINT32:
  410. if (!_upb_isle()) {
  411. /* The next stage will memcpy(dst, &val, 4) */
  412. val->uint32_val = val->uint64_val;
  413. }
  414. break;
  415. }
  416. }
  417. static const upb_msglayout_field *upb_find_field(const upb_msglayout *l,
  418. uint32_t field_number) {
  419. static upb_msglayout_field none = {0, 0, 0, 0, 0, 0};
  420. /* Lots of optimization opportunities here. */
  421. int i;
  422. if (l == NULL) return &none;
  423. for (i = 0; i < l->field_count; i++) {
  424. if (l->fields[i].number == field_number) {
  425. return &l->fields[i];
  426. }
  427. }
  428. return &none; /* Unknown field. */
  429. }
  430. static upb_msg *decode_newsubmsg(upb_decstate *d, const upb_msglayout *layout,
  431. const upb_msglayout_field *field) {
  432. const upb_msglayout *subl = layout->submsgs[field->submsg_index];
  433. return _upb_msg_new_inl(subl, &d->arena);
  434. }
  435. UPB_NOINLINE
  436. const char *decode_isdonefallback(upb_decstate *d, const char *ptr,
  437. int overrun) {
  438. ptr = decode_isdonefallback_inl(d, ptr, overrun);
  439. if (ptr == NULL) {
  440. decode_err(d);
  441. }
  442. return ptr;
  443. }
  444. static const char *decode_readstr(upb_decstate *d, const char *ptr, int size,
  445. upb_strview *str) {
  446. if (d->alias) {
  447. str->data = ptr;
  448. } else {
  449. char *data = upb_arena_malloc(&d->arena, size);
  450. if (!data) decode_err(d);
  451. memcpy(data, ptr, size);
  452. str->data = data;
  453. }
  454. str->size = size;
  455. return ptr + size;
  456. }
  457. UPB_FORCEINLINE
  458. static const char *decode_tosubmsg(upb_decstate *d, const char *ptr,
  459. upb_msg *submsg, const upb_msglayout *layout,
  460. const upb_msglayout_field *field, int size) {
  461. const upb_msglayout *subl = layout->submsgs[field->submsg_index];
  462. int saved_delta = decode_pushlimit(d, ptr, size);
  463. if (--d->depth < 0) decode_err(d);
  464. if (!decode_isdone(d, &ptr)) {
  465. ptr = decode_msg(d, ptr, submsg, subl);
  466. }
  467. if (d->end_group != DECODE_NOGROUP) decode_err(d);
  468. decode_poplimit(d, ptr, saved_delta);
  469. d->depth++;
  470. return ptr;
  471. }
  472. UPB_FORCEINLINE
  473. static const char *decode_group(upb_decstate *d, const char *ptr,
  474. upb_msg *submsg, const upb_msglayout *subl,
  475. uint32_t number) {
  476. if (--d->depth < 0) decode_err(d);
  477. if (decode_isdone(d, &ptr)) {
  478. decode_err(d);
  479. }
  480. ptr = decode_msg(d, ptr, submsg, subl);
  481. if (d->end_group != number) decode_err(d);
  482. d->end_group = DECODE_NOGROUP;
  483. d->depth++;
  484. return ptr;
  485. }
  486. UPB_FORCEINLINE
  487. static const char *decode_togroup(upb_decstate *d, const char *ptr,
  488. upb_msg *submsg, const upb_msglayout *layout,
  489. const upb_msglayout_field *field) {
  490. const upb_msglayout *subl = layout->submsgs[field->submsg_index];
  491. return decode_group(d, ptr, submsg, subl, field->number);
  492. }
  493. static const char *decode_toarray(upb_decstate *d, const char *ptr,
  494. upb_msg *msg, const upb_msglayout *layout,
  495. const upb_msglayout_field *field, wireval val,
  496. int op) {
  497. upb_array **arrp = UPB_PTR_AT(msg, field->offset, void);
  498. upb_array *arr = *arrp;
  499. void *mem;
  500. if (arr) {
  501. decode_reserve(d, arr, 1);
  502. } else {
  503. size_t lg2 = desctype_to_elem_size_lg2[field->descriptortype];
  504. arr = _upb_array_new(&d->arena, 4, lg2);
  505. if (!arr) decode_err(d);
  506. *arrp = arr;
  507. }
  508. switch (op) {
  509. case OP_SCALAR_LG2(0):
  510. case OP_SCALAR_LG2(2):
  511. case OP_SCALAR_LG2(3):
  512. /* Append scalar value. */
  513. mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << op, void);
  514. arr->len++;
  515. memcpy(mem, &val, 1 << op);
  516. return ptr;
  517. case OP_STRING:
  518. decode_verifyutf8(d, ptr, val.size);
  519. /* Fallthrough. */
  520. case OP_BYTES: {
  521. /* Append bytes. */
  522. upb_strview *str = (upb_strview*)_upb_array_ptr(arr) + arr->len;
  523. arr->len++;
  524. return decode_readstr(d, ptr, val.size, str);
  525. }
  526. case OP_SUBMSG: {
  527. /* Append submessage / group. */
  528. upb_msg *submsg = decode_newsubmsg(d, layout, field);
  529. *UPB_PTR_AT(_upb_array_ptr(arr), arr->len * sizeof(void *), upb_msg *) =
  530. submsg;
  531. arr->len++;
  532. if (UPB_UNLIKELY(field->descriptortype == UPB_DTYPE_GROUP)) {
  533. return decode_togroup(d, ptr, submsg, layout, field);
  534. } else {
  535. return decode_tosubmsg(d, ptr, submsg, layout, field, val.size);
  536. }
  537. }
  538. case OP_FIXPCK_LG2(2):
  539. case OP_FIXPCK_LG2(3): {
  540. /* Fixed packed. */
  541. int lg2 = op - OP_FIXPCK_LG2(0);
  542. int mask = (1 << lg2) - 1;
  543. size_t count = val.size >> lg2;
  544. if ((val.size & mask) != 0) {
  545. decode_err(d); /* Length isn't a round multiple of elem size. */
  546. }
  547. decode_reserve(d, arr, count);
  548. mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void);
  549. arr->len += count;
  550. memcpy(mem, ptr, val.size); /* XXX: ptr boundary. */
  551. return ptr + val.size;
  552. }
  553. case OP_VARPCK_LG2(0):
  554. case OP_VARPCK_LG2(2):
  555. case OP_VARPCK_LG2(3): {
  556. /* Varint packed. */
  557. int lg2 = op - OP_VARPCK_LG2(0);
  558. int scale = 1 << lg2;
  559. int saved_limit = decode_pushlimit(d, ptr, val.size);
  560. char *out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void);
  561. while (!decode_isdone(d, &ptr)) {
  562. wireval elem;
  563. ptr = decode_varint64(d, ptr, &elem.uint64_val);
  564. decode_munge(field->descriptortype, &elem);
  565. if (decode_reserve(d, arr, 1)) {
  566. out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void);
  567. }
  568. arr->len++;
  569. memcpy(out, &elem, scale);
  570. out += scale;
  571. }
  572. decode_poplimit(d, ptr, saved_limit);
  573. return ptr;
  574. }
  575. default:
  576. UPB_UNREACHABLE();
  577. }
  578. }
  579. static const char *decode_tomap(upb_decstate *d, const char *ptr, upb_msg *msg,
  580. const upb_msglayout *layout,
  581. const upb_msglayout_field *field, wireval val) {
  582. upb_map **map_p = UPB_PTR_AT(msg, field->offset, upb_map *);
  583. upb_map *map = *map_p;
  584. upb_map_entry ent;
  585. const upb_msglayout *entry = layout->submsgs[field->submsg_index];
  586. if (!map) {
  587. /* Lazily create map. */
  588. const upb_msglayout *entry = layout->submsgs[field->submsg_index];
  589. const upb_msglayout_field *key_field = &entry->fields[0];
  590. const upb_msglayout_field *val_field = &entry->fields[1];
  591. char key_size = desctype_to_mapsize[key_field->descriptortype];
  592. char val_size = desctype_to_mapsize[val_field->descriptortype];
  593. UPB_ASSERT(key_field->offset == 0);
  594. UPB_ASSERT(val_field->offset == sizeof(upb_strview));
  595. map = _upb_map_new(&d->arena, key_size, val_size);
  596. *map_p = map;
  597. }
  598. /* Parse map entry. */
  599. memset(&ent, 0, sizeof(ent));
  600. if (entry->fields[1].descriptortype == UPB_DESCRIPTOR_TYPE_MESSAGE ||
  601. entry->fields[1].descriptortype == UPB_DESCRIPTOR_TYPE_GROUP) {
  602. /* Create proactively to handle the case where it doesn't appear. */
  603. ent.v.val = upb_value_ptr(_upb_msg_new(entry->submsgs[0], &d->arena));
  604. }
  605. ptr = decode_tosubmsg(d, ptr, &ent.k, layout, field, val.size);
  606. _upb_map_set(map, &ent.k, map->key_size, &ent.v, map->val_size, &d->arena);
  607. return ptr;
  608. }
  609. static const char *decode_tomsg(upb_decstate *d, const char *ptr, upb_msg *msg,
  610. const upb_msglayout *layout,
  611. const upb_msglayout_field *field, wireval val,
  612. int op) {
  613. void *mem = UPB_PTR_AT(msg, field->offset, void);
  614. int type = field->descriptortype;
  615. /* Set presence if necessary. */
  616. if (field->presence < 0) {
  617. /* Oneof case */
  618. uint32_t *oneof_case = _upb_oneofcase_field(msg, field);
  619. if (op == OP_SUBMSG && *oneof_case != field->number) {
  620. memset(mem, 0, sizeof(void*));
  621. }
  622. *oneof_case = field->number;
  623. } else if (field->presence > 0) {
  624. _upb_sethas_field(msg, field);
  625. }
  626. /* Store into message. */
  627. switch (op) {
  628. case OP_SUBMSG: {
  629. upb_msg **submsgp = mem;
  630. upb_msg *submsg = *submsgp;
  631. if (!submsg) {
  632. submsg = decode_newsubmsg(d, layout, field);
  633. *submsgp = submsg;
  634. }
  635. if (UPB_UNLIKELY(type == UPB_DTYPE_GROUP)) {
  636. ptr = decode_togroup(d, ptr, submsg, layout, field);
  637. } else {
  638. ptr = decode_tosubmsg(d, ptr, submsg, layout, field, val.size);
  639. }
  640. break;
  641. }
  642. case OP_STRING:
  643. decode_verifyutf8(d, ptr, val.size);
  644. /* Fallthrough. */
  645. case OP_BYTES:
  646. return decode_readstr(d, ptr, val.size, mem);
  647. case OP_SCALAR_LG2(3):
  648. memcpy(mem, &val, 8);
  649. break;
  650. case OP_SCALAR_LG2(2):
  651. memcpy(mem, &val, 4);
  652. break;
  653. case OP_SCALAR_LG2(0):
  654. memcpy(mem, &val, 1);
  655. break;
  656. default:
  657. UPB_UNREACHABLE();
  658. }
  659. return ptr;
  660. }
  661. UPB_FORCEINLINE
  662. static bool decode_tryfastdispatch(upb_decstate *d, const char **ptr,
  663. upb_msg *msg, const upb_msglayout *layout) {
  664. #if UPB_FASTTABLE
  665. if (layout && layout->table_mask != (unsigned char)-1) {
  666. uint16_t tag = fastdecode_loadtag(*ptr);
  667. intptr_t table = decode_totable(layout);
  668. *ptr = fastdecode_tagdispatch(d, *ptr, msg, table, 0, tag);
  669. return true;
  670. }
  671. #endif
  672. return false;
  673. }
  674. UPB_NOINLINE
  675. static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg,
  676. const upb_msglayout *layout) {
  677. while (true) {
  678. uint32_t tag;
  679. const upb_msglayout_field *field;
  680. int field_number;
  681. int wire_type;
  682. const char *field_start = ptr;
  683. wireval val;
  684. int op;
  685. UPB_ASSERT(ptr < d->limit_ptr);
  686. ptr = decode_tag(d, ptr, &tag);
  687. field_number = tag >> 3;
  688. wire_type = tag & 7;
  689. field = upb_find_field(layout, field_number);
  690. switch (wire_type) {
  691. case UPB_WIRE_TYPE_VARINT:
  692. ptr = decode_varint64(d, ptr, &val.uint64_val);
  693. op = varint_ops[field->descriptortype];
  694. decode_munge(field->descriptortype, &val);
  695. break;
  696. case UPB_WIRE_TYPE_32BIT:
  697. memcpy(&val.uint32_val, ptr, 4);
  698. val.uint32_val = _upb_be_swap32(val.uint32_val);
  699. ptr += 4;
  700. op = OP_SCALAR_LG2(2);
  701. if (((1 << field->descriptortype) & fixed32_ok) == 0) goto unknown;
  702. break;
  703. case UPB_WIRE_TYPE_64BIT:
  704. memcpy(&val.uint64_val, ptr, 8);
  705. val.uint64_val = _upb_be_swap64(val.uint64_val);
  706. ptr += 8;
  707. op = OP_SCALAR_LG2(3);
  708. if (((1 << field->descriptortype) & fixed64_ok) == 0) goto unknown;
  709. break;
  710. case UPB_WIRE_TYPE_DELIMITED: {
  711. int ndx = field->descriptortype;
  712. uint64_t size;
  713. if (_upb_isrepeated(field)) ndx += 18;
  714. ptr = decode_varint64(d, ptr, &size);
  715. if (size >= INT32_MAX ||
  716. ptr - d->end + (int32_t)size > d->limit) {
  717. decode_err(d); /* Length overflow. */
  718. }
  719. op = delim_ops[ndx];
  720. val.size = size;
  721. break;
  722. }
  723. case UPB_WIRE_TYPE_START_GROUP:
  724. val.uint32_val = field_number;
  725. op = OP_SUBMSG;
  726. if (field->descriptortype != UPB_DTYPE_GROUP) goto unknown;
  727. break;
  728. case UPB_WIRE_TYPE_END_GROUP:
  729. d->end_group = field_number;
  730. return ptr;
  731. default:
  732. decode_err(d);
  733. }
  734. if (op >= 0) {
  735. /* Parse, using op for dispatch. */
  736. switch (field->label) {
  737. case UPB_LABEL_REPEATED:
  738. case _UPB_LABEL_PACKED:
  739. ptr = decode_toarray(d, ptr, msg, layout, field, val, op);
  740. break;
  741. case _UPB_LABEL_MAP:
  742. ptr = decode_tomap(d, ptr, msg, layout, field, val);
  743. break;
  744. default:
  745. ptr = decode_tomsg(d, ptr, msg, layout, field, val, op);
  746. break;
  747. }
  748. } else {
  749. unknown:
  750. /* Skip unknown field. */
  751. if (field_number == 0) decode_err(d);
  752. if (wire_type == UPB_WIRE_TYPE_DELIMITED) ptr += val.size;
  753. if (msg) {
  754. if (wire_type == UPB_WIRE_TYPE_START_GROUP) {
  755. d->unknown = field_start;
  756. d->unknown_msg = msg;
  757. ptr = decode_group(d, ptr, NULL, NULL, field_number);
  758. d->unknown_msg = NULL;
  759. field_start = d->unknown;
  760. }
  761. if (!_upb_msg_addunknown(msg, field_start, ptr - field_start,
  762. &d->arena)) {
  763. decode_err(d);
  764. }
  765. } else if (wire_type == UPB_WIRE_TYPE_START_GROUP) {
  766. ptr = decode_group(d, ptr, NULL, NULL, field_number);
  767. }
  768. }
  769. if (decode_isdone(d, &ptr)) return ptr;
  770. if (decode_tryfastdispatch(d, &ptr, msg, layout)) return ptr;
  771. }
  772. }
  773. const char *fastdecode_generic(struct upb_decstate *d, const char *ptr,
  774. upb_msg *msg, intptr_t table, uint64_t hasbits,
  775. uint64_t data) {
  776. (void)data;
  777. *(uint32_t*)msg |= hasbits;
  778. return decode_msg(d, ptr, msg, decode_totablep(table));
  779. }
  780. static bool decode_top(struct upb_decstate *d, const char *buf, void *msg,
  781. const upb_msglayout *l) {
  782. if (!decode_tryfastdispatch(d, &buf, msg, l)) {
  783. decode_msg(d, buf, msg, l);
  784. }
  785. return d->end_group == DECODE_NOGROUP;
  786. }
  787. bool _upb_decode(const char *buf, size_t size, void *msg,
  788. const upb_msglayout *l, upb_arena *arena, int options) {
  789. bool ok;
  790. upb_decstate state;
  791. unsigned depth = (unsigned)options >> 16;
  792. if (size == 0) {
  793. return true;
  794. } else if (size <= 16) {
  795. memset(&state.patch, 0, 32);
  796. memcpy(&state.patch, buf, size);
  797. buf = state.patch;
  798. state.end = buf + size;
  799. state.limit = 0;
  800. state.alias = false;
  801. } else {
  802. state.end = buf + size - 16;
  803. state.limit = 16;
  804. state.alias = options & UPB_DECODE_ALIAS;
  805. }
  806. state.limit_ptr = state.end;
  807. state.unknown_msg = NULL;
  808. state.depth = depth ? depth : 64;
  809. state.end_group = DECODE_NOGROUP;
  810. state.arena.head = arena->head;
  811. state.arena.last_size = arena->last_size;
  812. state.arena.cleanups = arena->cleanups;
  813. state.arena.parent = arena;
  814. if (UPB_UNLIKELY(UPB_SETJMP(state.err))) {
  815. ok = false;
  816. } else {
  817. ok = decode_top(&state, buf, msg, l);
  818. }
  819. arena->head.ptr = state.arena.head.ptr;
  820. arena->head.end = state.arena.head.end;
  821. arena->cleanups = state.arena.cleanups;
  822. return ok;
  823. }
  824. #undef OP_SCALAR_LG2
  825. #undef OP_FIXPCK_LG2
  826. #undef OP_VARPCK_LG2
  827. #undef OP_STRING
  828. #undef OP_SUBMSG
  829. /* We encode backwards, to avoid pre-computing lengths (one-pass encode). */
  830. #include <setjmp.h>
  831. #include <string.h>
  832. /* Must be last. */
  833. #define UPB_PB_VARINT_MAX_LEN 10
  834. UPB_NOINLINE
  835. static size_t encode_varint64(uint64_t val, char *buf) {
  836. size_t i = 0;
  837. do {
  838. uint8_t byte = val & 0x7fU;
  839. val >>= 7;
  840. if (val) byte |= 0x80U;
  841. buf[i++] = byte;
  842. } while (val);
  843. return i;
  844. }
  845. static uint32_t encode_zz32(int32_t n) { return ((uint32_t)n << 1) ^ (n >> 31); }
  846. static uint64_t encode_zz64(int64_t n) { return ((uint64_t)n << 1) ^ (n >> 63); }
  847. typedef struct {
  848. jmp_buf err;
  849. upb_alloc *alloc;
  850. char *buf, *ptr, *limit;
  851. int options;
  852. int depth;
  853. _upb_mapsorter sorter;
  854. } upb_encstate;
  855. static size_t upb_roundup_pow2(size_t bytes) {
  856. size_t ret = 128;
  857. while (ret < bytes) {
  858. ret *= 2;
  859. }
  860. return ret;
  861. }
  862. UPB_NORETURN static void encode_err(upb_encstate *e) {
  863. UPB_LONGJMP(e->err, 1);
  864. }
  865. UPB_NOINLINE
  866. static void encode_growbuffer(upb_encstate *e, size_t bytes) {
  867. size_t old_size = e->limit - e->buf;
  868. size_t new_size = upb_roundup_pow2(bytes + (e->limit - e->ptr));
  869. char *new_buf = upb_realloc(e->alloc, e->buf, old_size, new_size);
  870. if (!new_buf) encode_err(e);
  871. /* We want previous data at the end, realloc() put it at the beginning. */
  872. if (old_size > 0) {
  873. memmove(new_buf + new_size - old_size, e->buf, old_size);
  874. }
  875. e->ptr = new_buf + new_size - (e->limit - e->ptr);
  876. e->limit = new_buf + new_size;
  877. e->buf = new_buf;
  878. e->ptr -= bytes;
  879. }
  880. /* Call to ensure that at least "bytes" bytes are available for writing at
  881. * e->ptr. Returns false if the bytes could not be allocated. */
  882. UPB_FORCEINLINE
  883. static void encode_reserve(upb_encstate *e, size_t bytes) {
  884. if ((size_t)(e->ptr - e->buf) < bytes) {
  885. encode_growbuffer(e, bytes);
  886. return;
  887. }
  888. e->ptr -= bytes;
  889. }
  890. /* Writes the given bytes to the buffer, handling reserve/advance. */
  891. static void encode_bytes(upb_encstate *e, const void *data, size_t len) {
  892. if (len == 0) return; /* memcpy() with zero size is UB */
  893. encode_reserve(e, len);
  894. memcpy(e->ptr, data, len);
  895. }
  896. static void encode_fixed64(upb_encstate *e, uint64_t val) {
  897. val = _upb_be_swap64(val);
  898. encode_bytes(e, &val, sizeof(uint64_t));
  899. }
  900. static void encode_fixed32(upb_encstate *e, uint32_t val) {
  901. val = _upb_be_swap32(val);
  902. encode_bytes(e, &val, sizeof(uint32_t));
  903. }
  904. UPB_NOINLINE
  905. static void encode_longvarint(upb_encstate *e, uint64_t val) {
  906. size_t len;
  907. char *start;
  908. encode_reserve(e, UPB_PB_VARINT_MAX_LEN);
  909. len = encode_varint64(val, e->ptr);
  910. start = e->ptr + UPB_PB_VARINT_MAX_LEN - len;
  911. memmove(start, e->ptr, len);
  912. e->ptr = start;
  913. }
  914. UPB_FORCEINLINE
  915. static void encode_varint(upb_encstate *e, uint64_t val) {
  916. if (val < 128 && e->ptr != e->buf) {
  917. --e->ptr;
  918. *e->ptr = val;
  919. } else {
  920. encode_longvarint(e, val);
  921. }
  922. }
  923. static void encode_double(upb_encstate *e, double d) {
  924. uint64_t u64;
  925. UPB_ASSERT(sizeof(double) == sizeof(uint64_t));
  926. memcpy(&u64, &d, sizeof(uint64_t));
  927. encode_fixed64(e, u64);
  928. }
  929. static void encode_float(upb_encstate *e, float d) {
  930. uint32_t u32;
  931. UPB_ASSERT(sizeof(float) == sizeof(uint32_t));
  932. memcpy(&u32, &d, sizeof(uint32_t));
  933. encode_fixed32(e, u32);
  934. }
  935. static void encode_tag(upb_encstate *e, uint32_t field_number,
  936. uint8_t wire_type) {
  937. encode_varint(e, (field_number << 3) | wire_type);
  938. }
  939. static void encode_fixedarray(upb_encstate *e, const upb_array *arr,
  940. size_t elem_size, uint32_t tag) {
  941. size_t bytes = arr->len * elem_size;
  942. const char* data = _upb_array_constptr(arr);
  943. const char* ptr = data + bytes - elem_size;
  944. if (tag) {
  945. while (true) {
  946. encode_bytes(e, ptr, elem_size);
  947. encode_varint(e, tag);
  948. if (ptr == data) break;
  949. ptr -= elem_size;
  950. }
  951. } else {
  952. encode_bytes(e, data, bytes);
  953. }
  954. }
  955. static void encode_message(upb_encstate *e, const char *msg,
  956. const upb_msglayout *m, size_t *size);
  957. static void encode_scalar(upb_encstate *e, const void *_field_mem,
  958. const upb_msglayout *m, const upb_msglayout_field *f,
  959. bool skip_zero_value) {
  960. const char *field_mem = _field_mem;
  961. int wire_type;
  962. #define CASE(ctype, type, wtype, encodeval) \
  963. { \
  964. ctype val = *(ctype *)field_mem; \
  965. if (skip_zero_value && val == 0) { \
  966. return; \
  967. } \
  968. encode_##type(e, encodeval); \
  969. wire_type = wtype; \
  970. break; \
  971. }
  972. switch (f->descriptortype) {
  973. case UPB_DESCRIPTOR_TYPE_DOUBLE:
  974. CASE(double, double, UPB_WIRE_TYPE_64BIT, val);
  975. case UPB_DESCRIPTOR_TYPE_FLOAT:
  976. CASE(float, float, UPB_WIRE_TYPE_32BIT, val);
  977. case UPB_DESCRIPTOR_TYPE_INT64:
  978. case UPB_DESCRIPTOR_TYPE_UINT64:
  979. CASE(uint64_t, varint, UPB_WIRE_TYPE_VARINT, val);
  980. case UPB_DESCRIPTOR_TYPE_UINT32:
  981. CASE(uint32_t, varint, UPB_WIRE_TYPE_VARINT, val);
  982. case UPB_DESCRIPTOR_TYPE_INT32:
  983. case UPB_DESCRIPTOR_TYPE_ENUM:
  984. CASE(int32_t, varint, UPB_WIRE_TYPE_VARINT, (int64_t)val);
  985. case UPB_DESCRIPTOR_TYPE_SFIXED64:
  986. case UPB_DESCRIPTOR_TYPE_FIXED64:
  987. CASE(uint64_t, fixed64, UPB_WIRE_TYPE_64BIT, val);
  988. case UPB_DESCRIPTOR_TYPE_FIXED32:
  989. case UPB_DESCRIPTOR_TYPE_SFIXED32:
  990. CASE(uint32_t, fixed32, UPB_WIRE_TYPE_32BIT, val);
  991. case UPB_DESCRIPTOR_TYPE_BOOL:
  992. CASE(bool, varint, UPB_WIRE_TYPE_VARINT, val);
  993. case UPB_DESCRIPTOR_TYPE_SINT32:
  994. CASE(int32_t, varint, UPB_WIRE_TYPE_VARINT, encode_zz32(val));
  995. case UPB_DESCRIPTOR_TYPE_SINT64:
  996. CASE(int64_t, varint, UPB_WIRE_TYPE_VARINT, encode_zz64(val));
  997. case UPB_DESCRIPTOR_TYPE_STRING:
  998. case UPB_DESCRIPTOR_TYPE_BYTES: {
  999. upb_strview view = *(upb_strview*)field_mem;
  1000. if (skip_zero_value && view.size == 0) {
  1001. return;
  1002. }
  1003. encode_bytes(e, view.data, view.size);
  1004. encode_varint(e, view.size);
  1005. wire_type = UPB_WIRE_TYPE_DELIMITED;
  1006. break;
  1007. }
  1008. case UPB_DESCRIPTOR_TYPE_GROUP: {
  1009. size_t size;
  1010. void *submsg = *(void **)field_mem;
  1011. const upb_msglayout *subm = m->submsgs[f->submsg_index];
  1012. if (submsg == NULL) {
  1013. return;
  1014. }
  1015. if (--e->depth == 0) encode_err(e);
  1016. encode_tag(e, f->number, UPB_WIRE_TYPE_END_GROUP);
  1017. encode_message(e, submsg, subm, &size);
  1018. wire_type = UPB_WIRE_TYPE_START_GROUP;
  1019. e->depth++;
  1020. break;
  1021. }
  1022. case UPB_DESCRIPTOR_TYPE_MESSAGE: {
  1023. size_t size;
  1024. void *submsg = *(void **)field_mem;
  1025. const upb_msglayout *subm = m->submsgs[f->submsg_index];
  1026. if (submsg == NULL) {
  1027. return;
  1028. }
  1029. if (--e->depth == 0) encode_err(e);
  1030. encode_message(e, submsg, subm, &size);
  1031. encode_varint(e, size);
  1032. wire_type = UPB_WIRE_TYPE_DELIMITED;
  1033. e->depth++;
  1034. break;
  1035. }
  1036. default:
  1037. UPB_UNREACHABLE();
  1038. }
  1039. #undef CASE
  1040. encode_tag(e, f->number, wire_type);
  1041. }
  1042. static void encode_array(upb_encstate *e, const char *field_mem,
  1043. const upb_msglayout *m, const upb_msglayout_field *f) {
  1044. const upb_array *arr = *(const upb_array**)field_mem;
  1045. bool packed = f->label == _UPB_LABEL_PACKED;
  1046. size_t pre_len = e->limit - e->ptr;
  1047. if (arr == NULL || arr->len == 0) {
  1048. return;
  1049. }
  1050. #define VARINT_CASE(ctype, encode) \
  1051. { \
  1052. const ctype *start = _upb_array_constptr(arr); \
  1053. const ctype *ptr = start + arr->len; \
  1054. uint32_t tag = packed ? 0 : (f->number << 3) | UPB_WIRE_TYPE_VARINT; \
  1055. do { \
  1056. ptr--; \
  1057. encode_varint(e, encode); \
  1058. if (tag) encode_varint(e, tag); \
  1059. } while (ptr != start); \
  1060. } \
  1061. break;
  1062. #define TAG(wire_type) (packed ? 0 : (f->number << 3 | wire_type))
  1063. switch (f->descriptortype) {
  1064. case UPB_DESCRIPTOR_TYPE_DOUBLE:
  1065. encode_fixedarray(e, arr, sizeof(double), TAG(UPB_WIRE_TYPE_64BIT));
  1066. break;
  1067. case UPB_DESCRIPTOR_TYPE_FLOAT:
  1068. encode_fixedarray(e, arr, sizeof(float), TAG(UPB_WIRE_TYPE_32BIT));
  1069. break;
  1070. case UPB_DESCRIPTOR_TYPE_SFIXED64:
  1071. case UPB_DESCRIPTOR_TYPE_FIXED64:
  1072. encode_fixedarray(e, arr, sizeof(uint64_t), TAG(UPB_WIRE_TYPE_64BIT));
  1073. break;
  1074. case UPB_DESCRIPTOR_TYPE_FIXED32:
  1075. case UPB_DESCRIPTOR_TYPE_SFIXED32:
  1076. encode_fixedarray(e, arr, sizeof(uint32_t), TAG(UPB_WIRE_TYPE_32BIT));
  1077. break;
  1078. case UPB_DESCRIPTOR_TYPE_INT64:
  1079. case UPB_DESCRIPTOR_TYPE_UINT64:
  1080. VARINT_CASE(uint64_t, *ptr);
  1081. case UPB_DESCRIPTOR_TYPE_UINT32:
  1082. VARINT_CASE(uint32_t, *ptr);
  1083. case UPB_DESCRIPTOR_TYPE_INT32:
  1084. case UPB_DESCRIPTOR_TYPE_ENUM:
  1085. VARINT_CASE(int32_t, (int64_t)*ptr);
  1086. case UPB_DESCRIPTOR_TYPE_BOOL:
  1087. VARINT_CASE(bool, *ptr);
  1088. case UPB_DESCRIPTOR_TYPE_SINT32:
  1089. VARINT_CASE(int32_t, encode_zz32(*ptr));
  1090. case UPB_DESCRIPTOR_TYPE_SINT64:
  1091. VARINT_CASE(int64_t, encode_zz64(*ptr));
  1092. case UPB_DESCRIPTOR_TYPE_STRING:
  1093. case UPB_DESCRIPTOR_TYPE_BYTES: {
  1094. const upb_strview *start = _upb_array_constptr(arr);
  1095. const upb_strview *ptr = start + arr->len;
  1096. do {
  1097. ptr--;
  1098. encode_bytes(e, ptr->data, ptr->size);
  1099. encode_varint(e, ptr->size);
  1100. encode_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED);
  1101. } while (ptr != start);
  1102. return;
  1103. }
  1104. case UPB_DESCRIPTOR_TYPE_GROUP: {
  1105. const void *const*start = _upb_array_constptr(arr);
  1106. const void *const*ptr = start + arr->len;
  1107. const upb_msglayout *subm = m->submsgs[f->submsg_index];
  1108. if (--e->depth == 0) encode_err(e);
  1109. do {
  1110. size_t size;
  1111. ptr--;
  1112. encode_tag(e, f->number, UPB_WIRE_TYPE_END_GROUP);
  1113. encode_message(e, *ptr, subm, &size);
  1114. encode_tag(e, f->number, UPB_WIRE_TYPE_START_GROUP);
  1115. } while (ptr != start);
  1116. e->depth++;
  1117. return;
  1118. }
  1119. case UPB_DESCRIPTOR_TYPE_MESSAGE: {
  1120. const void *const*start = _upb_array_constptr(arr);
  1121. const void *const*ptr = start + arr->len;
  1122. const upb_msglayout *subm = m->submsgs[f->submsg_index];
  1123. if (--e->depth == 0) encode_err(e);
  1124. do {
  1125. size_t size;
  1126. ptr--;
  1127. encode_message(e, *ptr, subm, &size);
  1128. encode_varint(e, size);
  1129. encode_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED);
  1130. } while (ptr != start);
  1131. e->depth++;
  1132. return;
  1133. }
  1134. }
  1135. #undef VARINT_CASE
  1136. if (packed) {
  1137. encode_varint(e, e->limit - e->ptr - pre_len);
  1138. encode_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED);
  1139. }
  1140. }
  1141. static void encode_mapentry(upb_encstate *e, uint32_t number,
  1142. const upb_msglayout *layout,
  1143. const upb_map_entry *ent) {
  1144. const upb_msglayout_field *key_field = &layout->fields[0];
  1145. const upb_msglayout_field *val_field = &layout->fields[1];
  1146. size_t pre_len = e->limit - e->ptr;
  1147. size_t size;
  1148. encode_scalar(e, &ent->v, layout, val_field, false);
  1149. encode_scalar(e, &ent->k, layout, key_field, false);
  1150. size = (e->limit - e->ptr) - pre_len;
  1151. encode_varint(e, size);
  1152. encode_tag(e, number, UPB_WIRE_TYPE_DELIMITED);
  1153. }
  1154. static void encode_map(upb_encstate *e, const char *field_mem,
  1155. const upb_msglayout *m, const upb_msglayout_field *f) {
  1156. const upb_map *map = *(const upb_map**)field_mem;
  1157. const upb_msglayout *layout = m->submsgs[f->submsg_index];
  1158. UPB_ASSERT(layout->field_count == 2);
  1159. if (map == NULL) return;
  1160. if (e->options & UPB_ENCODE_DETERMINISTIC) {
  1161. _upb_sortedmap sorted;
  1162. _upb_mapsorter_pushmap(&e->sorter, layout->fields[0].descriptortype, map,
  1163. &sorted);
  1164. upb_map_entry ent;
  1165. while (_upb_sortedmap_next(&e->sorter, map, &sorted, &ent)) {
  1166. encode_mapentry(e, f->number, layout, &ent);
  1167. }
  1168. _upb_mapsorter_popmap(&e->sorter, &sorted);
  1169. } else {
  1170. upb_strtable_iter i;
  1171. upb_strtable_begin(&i, &map->table);
  1172. for(; !upb_strtable_done(&i); upb_strtable_next(&i)) {
  1173. upb_strview key = upb_strtable_iter_key(&i);
  1174. const upb_value val = upb_strtable_iter_value(&i);
  1175. upb_map_entry ent;
  1176. _upb_map_fromkey(key, &ent.k, map->key_size);
  1177. _upb_map_fromvalue(val, &ent.v, map->val_size);
  1178. encode_mapentry(e, f->number, layout, &ent);
  1179. }
  1180. }
  1181. }
  1182. static void encode_scalarfield(upb_encstate *e, const char *msg,
  1183. const upb_msglayout *m,
  1184. const upb_msglayout_field *f) {
  1185. bool skip_empty = false;
  1186. if (f->presence == 0) {
  1187. /* Proto3 presence. */
  1188. skip_empty = true;
  1189. } else if (f->presence > 0) {
  1190. /* Proto2 presence: hasbit. */
  1191. if (!_upb_hasbit_field(msg, f)) return;
  1192. } else {
  1193. /* Field is in a oneof. */
  1194. if (_upb_getoneofcase_field(msg, f) != f->number) return;
  1195. }
  1196. encode_scalar(e, msg + f->offset, m, f, skip_empty);
  1197. }
  1198. static void encode_message(upb_encstate *e, const char *msg,
  1199. const upb_msglayout *m, size_t *size) {
  1200. size_t pre_len = e->limit - e->ptr;
  1201. const upb_msglayout_field *f = &m->fields[m->field_count];
  1202. const upb_msglayout_field *first = &m->fields[0];
  1203. if ((e->options & UPB_ENCODE_SKIPUNKNOWN) == 0) {
  1204. size_t unknown_size;
  1205. const char *unknown = upb_msg_getunknown(msg, &unknown_size);
  1206. if (unknown) {
  1207. encode_bytes(e, unknown, unknown_size);
  1208. }
  1209. }
  1210. while (f != first) {
  1211. f--;
  1212. if (_upb_isrepeated(f)) {
  1213. encode_array(e, msg + f->offset, m, f);
  1214. } else if (f->label == _UPB_LABEL_MAP) {
  1215. encode_map(e, msg + f->offset, m, f);
  1216. } else {
  1217. encode_scalarfield(e, msg, m, f);
  1218. }
  1219. }
  1220. *size = (e->limit - e->ptr) - pre_len;
  1221. }
  1222. char *upb_encode_ex(const void *msg, const upb_msglayout *l, int options,
  1223. upb_arena *arena, size_t *size) {
  1224. upb_encstate e;
  1225. unsigned depth = (unsigned)options >> 16;
  1226. e.alloc = upb_arena_alloc(arena);
  1227. e.buf = NULL;
  1228. e.limit = NULL;
  1229. e.ptr = NULL;
  1230. e.depth = depth ? depth : 64;
  1231. e.options = options;
  1232. _upb_mapsorter_init(&e.sorter);
  1233. char *ret = NULL;
  1234. if (UPB_SETJMP(e.err)) {
  1235. *size = 0;
  1236. ret = NULL;
  1237. } else {
  1238. encode_message(&e, msg, l, size);
  1239. *size = e.limit - e.ptr;
  1240. if (*size == 0) {
  1241. static char ch;
  1242. ret = &ch;
  1243. } else {
  1244. UPB_ASSERT(e.ptr);
  1245. ret = e.ptr;
  1246. }
  1247. }
  1248. _upb_mapsorter_destroy(&e.sorter);
  1249. return ret;
  1250. }
  1251. /** upb_msg *******************************************************************/
  1252. static const size_t overhead = sizeof(upb_msg_internal);
  1253. static const upb_msg_internal *upb_msg_getinternal_const(const upb_msg *msg) {
  1254. ptrdiff_t size = sizeof(upb_msg_internal);
  1255. return (upb_msg_internal*)((char*)msg - size);
  1256. }
  1257. upb_msg *_upb_msg_new(const upb_msglayout *l, upb_arena *a) {
  1258. return _upb_msg_new_inl(l, a);
  1259. }
  1260. void _upb_msg_clear(upb_msg *msg, const upb_msglayout *l) {
  1261. void *mem = UPB_PTR_AT(msg, -sizeof(upb_msg_internal), char);
  1262. memset(mem, 0, upb_msg_sizeof(l));
  1263. }
  1264. bool _upb_msg_addunknown(upb_msg *msg, const char *data, size_t len,
  1265. upb_arena *arena) {
  1266. upb_msg_internal *in = upb_msg_getinternal(msg);
  1267. if (!in->unknown) {
  1268. size_t size = 128;
  1269. while (size < len) size *= 2;
  1270. in->unknown = upb_arena_malloc(arena, size + overhead);
  1271. if (!in->unknown) return false;
  1272. in->unknown->size = size;
  1273. in->unknown->len = 0;
  1274. } else if (in->unknown->size - in->unknown->len < len) {
  1275. size_t need = in->unknown->len + len;
  1276. size_t size = in->unknown->size;
  1277. while (size < need) size *= 2;
  1278. in->unknown = upb_arena_realloc(
  1279. arena, in->unknown, in->unknown->size + overhead, size + overhead);
  1280. if (!in->unknown) return false;
  1281. in->unknown->size = size;
  1282. }
  1283. memcpy(UPB_PTR_AT(in->unknown + 1, in->unknown->len, char), data, len);
  1284. in->unknown->len += len;
  1285. return true;
  1286. }
  1287. void _upb_msg_discardunknown_shallow(upb_msg *msg) {
  1288. upb_msg_internal *in = upb_msg_getinternal(msg);
  1289. if (in->unknown) {
  1290. in->unknown->len = 0;
  1291. }
  1292. }
  1293. const char *upb_msg_getunknown(const upb_msg *msg, size_t *len) {
  1294. const upb_msg_internal *in = upb_msg_getinternal_const(msg);
  1295. if (in->unknown) {
  1296. *len = in->unknown->len;
  1297. return (char*)(in->unknown + 1);
  1298. } else {
  1299. *len = 0;
  1300. return NULL;
  1301. }
  1302. }
  1303. /** upb_array *****************************************************************/
  1304. bool _upb_array_realloc(upb_array *arr, size_t min_size, upb_arena *arena) {
  1305. size_t new_size = UPB_MAX(arr->size, 4);
  1306. int elem_size_lg2 = arr->data & 7;
  1307. size_t old_bytes = arr->size << elem_size_lg2;
  1308. size_t new_bytes;
  1309. void* ptr = _upb_array_ptr(arr);
  1310. /* Log2 ceiling of size. */
  1311. while (new_size < min_size) new_size *= 2;
  1312. new_bytes = new_size << elem_size_lg2;
  1313. ptr = upb_arena_realloc(arena, ptr, old_bytes, new_bytes);
  1314. if (!ptr) {
  1315. return false;
  1316. }
  1317. arr->data = _upb_tag_arrptr(ptr, elem_size_lg2);
  1318. arr->size = new_size;
  1319. return true;
  1320. }
  1321. static upb_array *getorcreate_array(upb_array **arr_ptr, int elem_size_lg2,
  1322. upb_arena *arena) {
  1323. upb_array *arr = *arr_ptr;
  1324. if (!arr) {
  1325. arr = _upb_array_new(arena, 4, elem_size_lg2);
  1326. if (!arr) return NULL;
  1327. *arr_ptr = arr;
  1328. }
  1329. return arr;
  1330. }
  1331. void *_upb_array_resize_fallback(upb_array **arr_ptr, size_t size,
  1332. int elem_size_lg2, upb_arena *arena) {
  1333. upb_array *arr = getorcreate_array(arr_ptr, elem_size_lg2, arena);
  1334. return arr && _upb_array_resize(arr, size, arena) ? _upb_array_ptr(arr)
  1335. : NULL;
  1336. }
  1337. bool _upb_array_append_fallback(upb_array **arr_ptr, const void *value,
  1338. int elem_size_lg2, upb_arena *arena) {
  1339. upb_array *arr = getorcreate_array(arr_ptr, elem_size_lg2, arena);
  1340. if (!arr) return false;
  1341. size_t elems = arr->len;
  1342. if (!_upb_array_resize(arr, elems + 1, arena)) {
  1343. return false;
  1344. }
  1345. char *data = _upb_array_ptr(arr);
  1346. memcpy(data + (elems << elem_size_lg2), value, 1 << elem_size_lg2);
  1347. return true;
  1348. }
  1349. /** upb_map *******************************************************************/
  1350. upb_map *_upb_map_new(upb_arena *a, size_t key_size, size_t value_size) {
  1351. upb_map *map = upb_arena_malloc(a, sizeof(upb_map));
  1352. if (!map) {
  1353. return NULL;
  1354. }
  1355. upb_strtable_init2(&map->table, UPB_CTYPE_INT32, 4, upb_arena_alloc(a));
  1356. map->key_size = key_size;
  1357. map->val_size = value_size;
  1358. return map;
  1359. }
  1360. static void _upb_mapsorter_getkeys(const void *_a, const void *_b, void *a_key,
  1361. void *b_key, size_t size) {
  1362. const upb_tabent *const*a = _a;
  1363. const upb_tabent *const*b = _b;
  1364. upb_strview a_tabkey = upb_tabstrview((*a)->key);
  1365. upb_strview b_tabkey = upb_tabstrview((*b)->key);
  1366. _upb_map_fromkey(a_tabkey, a_key, size);
  1367. _upb_map_fromkey(b_tabkey, b_key, size);
  1368. }
  1369. static int _upb_mapsorter_cmpi64(const void *_a, const void *_b) {
  1370. int64_t a, b;
  1371. _upb_mapsorter_getkeys(_a, _b, &a, &b, 8);
  1372. return a - b;
  1373. }
  1374. static int _upb_mapsorter_cmpu64(const void *_a, const void *_b) {
  1375. uint64_t a, b;
  1376. _upb_mapsorter_getkeys(_a, _b, &a, &b, 8);
  1377. return a - b;
  1378. }
  1379. static int _upb_mapsorter_cmpi32(const void *_a, const void *_b) {
  1380. int32_t a, b;
  1381. _upb_mapsorter_getkeys(_a, _b, &a, &b, 4);
  1382. return a - b;
  1383. }
  1384. static int _upb_mapsorter_cmpu32(const void *_a, const void *_b) {
  1385. uint32_t a, b;
  1386. _upb_mapsorter_getkeys(_a, _b, &a, &b, 4);
  1387. return a - b;
  1388. }
  1389. static int _upb_mapsorter_cmpbool(const void *_a, const void *_b) {
  1390. bool a, b;
  1391. _upb_mapsorter_getkeys(_a, _b, &a, &b, 1);
  1392. return a - b;
  1393. }
  1394. static int _upb_mapsorter_cmpstr(const void *_a, const void *_b) {
  1395. upb_strview a, b;
  1396. _upb_mapsorter_getkeys(_a, _b, &a, &b, UPB_MAPTYPE_STRING);
  1397. size_t common_size = UPB_MIN(a.size, b.size);
  1398. int cmp = memcmp(a.data, b.data, common_size);
  1399. if (cmp) return cmp;
  1400. return a.size - b.size;
  1401. }
  1402. bool _upb_mapsorter_pushmap(_upb_mapsorter *s, upb_descriptortype_t key_type,
  1403. const upb_map *map, _upb_sortedmap *sorted) {
  1404. int map_size = _upb_map_size(map);
  1405. sorted->start = s->size;
  1406. sorted->pos = sorted->start;
  1407. sorted->end = sorted->start + map_size;
  1408. /* Grow s->entries if necessary. */
  1409. if (sorted->end > s->cap) {
  1410. s->cap = _upb_lg2ceilsize(sorted->end);
  1411. s->entries = realloc(s->entries, s->cap * sizeof(*s->entries));
  1412. if (!s->entries) return false;
  1413. }
  1414. s->size = sorted->end;
  1415. /* Copy non-empty entries from the table to s->entries. */
  1416. upb_tabent const**dst = &s->entries[sorted->start];
  1417. const upb_tabent *src = map->table.t.entries;
  1418. const upb_tabent *end = src + upb_table_size(&map->table.t);
  1419. for (; src < end; src++) {
  1420. if (!upb_tabent_isempty(src)) {
  1421. *dst = src;
  1422. dst++;
  1423. }
  1424. }
  1425. UPB_ASSERT(dst == &s->entries[sorted->end]);
  1426. /* Sort entries according to the key type. */
  1427. int (*compar)(const void *, const void *);
  1428. switch (key_type) {
  1429. case UPB_DESCRIPTOR_TYPE_INT64:
  1430. case UPB_DESCRIPTOR_TYPE_SFIXED64:
  1431. case UPB_DESCRIPTOR_TYPE_SINT64:
  1432. compar = _upb_mapsorter_cmpi64;
  1433. break;
  1434. case UPB_DESCRIPTOR_TYPE_UINT64:
  1435. case UPB_DESCRIPTOR_TYPE_FIXED64:
  1436. compar = _upb_mapsorter_cmpu64;
  1437. break;
  1438. case UPB_DESCRIPTOR_TYPE_INT32:
  1439. case UPB_DESCRIPTOR_TYPE_SINT32:
  1440. case UPB_DESCRIPTOR_TYPE_SFIXED32:
  1441. case UPB_DESCRIPTOR_TYPE_ENUM:
  1442. compar = _upb_mapsorter_cmpi32;
  1443. break;
  1444. case UPB_DESCRIPTOR_TYPE_UINT32:
  1445. case UPB_DESCRIPTOR_TYPE_FIXED32:
  1446. compar = _upb_mapsorter_cmpu32;
  1447. break;
  1448. case UPB_DESCRIPTOR_TYPE_BOOL:
  1449. compar = _upb_mapsorter_cmpbool;
  1450. break;
  1451. case UPB_DESCRIPTOR_TYPE_STRING:
  1452. compar = _upb_mapsorter_cmpstr;
  1453. break;
  1454. default:
  1455. UPB_UNREACHABLE();
  1456. }
  1457. qsort(&s->entries[sorted->start], map_size, sizeof(*s->entries), compar);
  1458. return true;
  1459. }
  1460. /*
  1461. ** upb_table Implementation
  1462. **
  1463. ** Implementation is heavily inspired by Lua's ltable.c.
  1464. */
  1465. #include <string.h>
  1466. #include "third_party/wyhash/wyhash.h"
  1467. /* Must be last. */
  1468. #define UPB_MAXARRSIZE 16 /* 64k. */
  1469. /* From Chromium. */
  1470. #define ARRAY_SIZE(x) \
  1471. ((sizeof(x)/sizeof(0[x])) / ((size_t)(!(sizeof(x) % sizeof(0[x])))))
  1472. static const double MAX_LOAD = 0.85;
  1473. /* The minimum utilization of the array part of a mixed hash/array table. This
  1474. * is a speed/memory-usage tradeoff (though it's not straightforward because of
  1475. * cache effects). The lower this is, the more memory we'll use. */
  1476. static const double MIN_DENSITY = 0.1;
  1477. bool is_pow2(uint64_t v) { return v == 0 || (v & (v - 1)) == 0; }
  1478. int log2ceil(uint64_t v) {
  1479. int ret = 0;
  1480. bool pow2 = is_pow2(v);
  1481. while (v >>= 1) ret++;
  1482. ret = pow2 ? ret : ret + 1; /* Ceiling. */
  1483. return UPB_MIN(UPB_MAXARRSIZE, ret);
  1484. }
  1485. char *upb_strdup(const char *s, upb_alloc *a) {
  1486. return upb_strdup2(s, strlen(s), a);
  1487. }
  1488. char *upb_strdup2(const char *s, size_t len, upb_alloc *a) {
  1489. size_t n;
  1490. char *p;
  1491. /* Prevent overflow errors. */
  1492. if (len == SIZE_MAX) return NULL;
  1493. /* Always null-terminate, even if binary data; but don't rely on the input to
  1494. * have a null-terminating byte since it may be a raw binary buffer. */
  1495. n = len + 1;
  1496. p = upb_malloc(a, n);
  1497. if (p) {
  1498. memcpy(p, s, len);
  1499. p[len] = 0;
  1500. }
  1501. return p;
  1502. }
  1503. /* A type to represent the lookup key of either a strtable or an inttable. */
  1504. typedef union {
  1505. uintptr_t num;
  1506. struct {
  1507. const char *str;
  1508. size_t len;
  1509. } str;
  1510. } lookupkey_t;
  1511. static lookupkey_t strkey2(const char *str, size_t len) {
  1512. lookupkey_t k;
  1513. k.str.str = str;
  1514. k.str.len = len;
  1515. return k;
  1516. }
  1517. static lookupkey_t intkey(uintptr_t key) {
  1518. lookupkey_t k;
  1519. k.num = key;
  1520. return k;
  1521. }
  1522. typedef uint32_t hashfunc_t(upb_tabkey key);
  1523. typedef bool eqlfunc_t(upb_tabkey k1, lookupkey_t k2);
  1524. /* Base table (shared code) ***************************************************/
  1525. /* For when we need to cast away const. */
  1526. static upb_tabent *mutable_entries(upb_table *t) {
  1527. return (upb_tabent*)t->entries;
  1528. }
  1529. static bool isfull(upb_table *t) {
  1530. return t->count == t->max_count;
  1531. }
  1532. static bool init(upb_table *t, uint8_t size_lg2, upb_alloc *a) {
  1533. size_t bytes;
  1534. t->count = 0;
  1535. t->size_lg2 = size_lg2;
  1536. t->mask = upb_table_size(t) ? upb_table_size(t) - 1 : 0;
  1537. t->max_count = upb_table_size(t) * MAX_LOAD;
  1538. bytes = upb_table_size(t) * sizeof(upb_tabent);
  1539. if (bytes > 0) {
  1540. t->entries = upb_malloc(a, bytes);
  1541. if (!t->entries) return false;
  1542. memset(mutable_entries(t), 0, bytes);
  1543. } else {
  1544. t->entries = NULL;
  1545. }
  1546. return true;
  1547. }
  1548. static void uninit(upb_table *t, upb_alloc *a) {
  1549. upb_free(a, mutable_entries(t));
  1550. }
  1551. static upb_tabent *emptyent(upb_table *t, upb_tabent *e) {
  1552. upb_tabent *begin = mutable_entries(t);
  1553. upb_tabent *end = begin + upb_table_size(t);
  1554. for (e = e + 1; e < end; e++) {
  1555. if (upb_tabent_isempty(e)) return e;
  1556. }
  1557. for (e = begin; e < end; e++) {
  1558. if (upb_tabent_isempty(e)) return e;
  1559. }
  1560. UPB_ASSERT(false);
  1561. return NULL;
  1562. }
  1563. static upb_tabent *getentry_mutable(upb_table *t, uint32_t hash) {
  1564. return (upb_tabent*)upb_getentry(t, hash);
  1565. }
  1566. static const upb_tabent *findentry(const upb_table *t, lookupkey_t key,
  1567. uint32_t hash, eqlfunc_t *eql) {
  1568. const upb_tabent *e;
  1569. if (t->size_lg2 == 0) return NULL;
  1570. e = upb_getentry(t, hash);
  1571. if (upb_tabent_isempty(e)) return NULL;
  1572. while (1) {
  1573. if (eql(e->key, key)) return e;
  1574. if ((e = e->next) == NULL) return NULL;
  1575. }
  1576. }
  1577. static upb_tabent *findentry_mutable(upb_table *t, lookupkey_t key,
  1578. uint32_t hash, eqlfunc_t *eql) {
  1579. return (upb_tabent*)findentry(t, key, hash, eql);
  1580. }
  1581. static bool lookup(const upb_table *t, lookupkey_t key, upb_value *v,
  1582. uint32_t hash, eqlfunc_t *eql) {
  1583. const upb_tabent *e = findentry(t, key, hash, eql);
  1584. if (e) {
  1585. if (v) {
  1586. _upb_value_setval(v, e->val.val);
  1587. }
  1588. return true;
  1589. } else {
  1590. return false;
  1591. }
  1592. }
  1593. /* The given key must not already exist in the table. */
  1594. static void insert(upb_table *t, lookupkey_t key, upb_tabkey tabkey,
  1595. upb_value val, uint32_t hash,
  1596. hashfunc_t *hashfunc, eqlfunc_t *eql) {
  1597. upb_tabent *mainpos_e;
  1598. upb_tabent *our_e;
  1599. UPB_ASSERT(findentry(t, key, hash, eql) == NULL);
  1600. t->count++;
  1601. mainpos_e = getentry_mutable(t, hash);
  1602. our_e = mainpos_e;
  1603. if (upb_tabent_isempty(mainpos_e)) {
  1604. /* Our main position is empty; use it. */
  1605. our_e->next = NULL;
  1606. } else {
  1607. /* Collision. */
  1608. upb_tabent *new_e = emptyent(t, mainpos_e);
  1609. /* Head of collider's chain. */
  1610. upb_tabent *chain = getentry_mutable(t, hashfunc(mainpos_e->key));
  1611. if (chain == mainpos_e) {
  1612. /* Existing ent is in its main position (it has the same hash as us, and
  1613. * is the head of our chain). Insert to new ent and append to this chain. */
  1614. new_e->next = mainpos_e->next;
  1615. mainpos_e->next = new_e;
  1616. our_e = new_e;
  1617. } else {
  1618. /* Existing ent is not in its main position (it is a node in some other
  1619. * chain). This implies that no existing ent in the table has our hash.
  1620. * Evict it (updating its chain) and use its ent for head of our chain. */
  1621. *new_e = *mainpos_e; /* copies next. */
  1622. while (chain->next != mainpos_e) {
  1623. chain = (upb_tabent*)chain->next;
  1624. UPB_ASSERT(chain);
  1625. }
  1626. chain->next = new_e;
  1627. our_e = mainpos_e;
  1628. our_e->next = NULL;
  1629. }
  1630. }
  1631. our_e->key = tabkey;
  1632. our_e->val.val = val.val;
  1633. UPB_ASSERT(findentry(t, key, hash, eql) == our_e);
  1634. }
  1635. static bool rm(upb_table *t, lookupkey_t key, upb_value *val,
  1636. upb_tabkey *removed, uint32_t hash, eqlfunc_t *eql) {
  1637. upb_tabent *chain = getentry_mutable(t, hash);
  1638. if (upb_tabent_isempty(chain)) return false;
  1639. if (eql(chain->key, key)) {
  1640. /* Element to remove is at the head of its chain. */
  1641. t->count--;
  1642. if (val) _upb_value_setval(val, chain->val.val);
  1643. if (removed) *removed = chain->key;
  1644. if (chain->next) {
  1645. upb_tabent *move = (upb_tabent*)chain->next;
  1646. *chain = *move;
  1647. move->key = 0; /* Make the slot empty. */
  1648. } else {
  1649. chain->key = 0; /* Make the slot empty. */
  1650. }
  1651. return true;
  1652. } else {
  1653. /* Element to remove is either in a non-head position or not in the
  1654. * table. */
  1655. while (chain->next && !eql(chain->next->key, key)) {
  1656. chain = (upb_tabent*)chain->next;
  1657. }
  1658. if (chain->next) {
  1659. /* Found element to remove. */
  1660. upb_tabent *rm = (upb_tabent*)chain->next;
  1661. t->count--;
  1662. if (val) _upb_value_setval(val, chain->next->val.val);
  1663. if (removed) *removed = rm->key;
  1664. rm->key = 0; /* Make the slot empty. */
  1665. chain->next = rm->next;
  1666. return true;
  1667. } else {
  1668. /* Element to remove is not in the table. */
  1669. return false;
  1670. }
  1671. }
  1672. }
  1673. static size_t next(const upb_table *t, size_t i) {
  1674. do {
  1675. if (++i >= upb_table_size(t))
  1676. return SIZE_MAX - 1; /* Distinct from -1. */
  1677. } while(upb_tabent_isempty(&t->entries[i]));
  1678. return i;
  1679. }
  1680. static size_t begin(const upb_table *t) {
  1681. return next(t, -1);
  1682. }
  1683. /* upb_strtable ***************************************************************/
  1684. /* A simple "subclass" of upb_table that only adds a hash function for strings. */
  1685. static upb_tabkey strcopy(lookupkey_t k2, upb_alloc *a) {
  1686. uint32_t len = (uint32_t) k2.str.len;
  1687. char *str = upb_malloc(a, k2.str.len + sizeof(uint32_t) + 1);
  1688. if (str == NULL) return 0;
  1689. memcpy(str, &len, sizeof(uint32_t));
  1690. if (k2.str.len) memcpy(str + sizeof(uint32_t), k2.str.str, k2.str.len);
  1691. str[sizeof(uint32_t) + k2.str.len] = '\0';
  1692. return (uintptr_t)str;
  1693. }
  1694. static uint32_t table_hash(const char *p, size_t n) {
  1695. return wyhash(p, n, 0, _wyp);
  1696. }
  1697. static uint32_t strhash(upb_tabkey key) {
  1698. uint32_t len;
  1699. char *str = upb_tabstr(key, &len);
  1700. return table_hash(str, len);
  1701. }
  1702. static bool streql(upb_tabkey k1, lookupkey_t k2) {
  1703. uint32_t len;
  1704. char *str = upb_tabstr(k1, &len);
  1705. return len == k2.str.len && (len == 0 || memcmp(str, k2.str.str, len) == 0);
  1706. }
  1707. bool upb_strtable_init2(upb_strtable *t, upb_ctype_t ctype,
  1708. size_t expected_size, upb_alloc *a) {
  1709. UPB_UNUSED(ctype); /* TODO(haberman): rm */
  1710. // Multiply by approximate reciprocal of MAX_LOAD (0.85), with pow2 denominator.
  1711. size_t need_entries = (expected_size + 1) * 1204 / 1024;
  1712. UPB_ASSERT(need_entries >= expected_size * 0.85);
  1713. int size_lg2 = _upb_lg2ceil(need_entries);
  1714. return init(&t->t, size_lg2, a);
  1715. }
  1716. void upb_strtable_clear(upb_strtable *t) {
  1717. size_t bytes = upb_table_size(&t->t) * sizeof(upb_tabent);
  1718. t->t.count = 0;
  1719. memset((char*)t->t.entries, 0, bytes);
  1720. }
  1721. void upb_strtable_uninit2(upb_strtable *t, upb_alloc *a) {
  1722. size_t i;
  1723. for (i = 0; i < upb_table_size(&t->t); i++)
  1724. upb_free(a, (void*)t->t.entries[i].key);
  1725. uninit(&t->t, a);
  1726. }
  1727. bool upb_strtable_resize(upb_strtable *t, size_t size_lg2, upb_alloc *a) {
  1728. upb_strtable new_table;
  1729. upb_strtable_iter i;
  1730. if (!init(&new_table.t, size_lg2, a))
  1731. return false;
  1732. upb_strtable_begin(&i, t);
  1733. for ( ; !upb_strtable_done(&i); upb_strtable_next(&i)) {
  1734. upb_strview key = upb_strtable_iter_key(&i);
  1735. upb_strtable_insert3(
  1736. &new_table, key.data, key.size,
  1737. upb_strtable_iter_value(&i), a);
  1738. }
  1739. upb_strtable_uninit2(t, a);
  1740. *t = new_table;
  1741. return true;
  1742. }
  1743. bool upb_strtable_insert3(upb_strtable *t, const char *k, size_t len,
  1744. upb_value v, upb_alloc *a) {
  1745. lookupkey_t key;
  1746. upb_tabkey tabkey;
  1747. uint32_t hash;
  1748. if (isfull(&t->t)) {
  1749. /* Need to resize. New table of double the size, add old elements to it. */
  1750. if (!upb_strtable_resize(t, t->t.size_lg2 + 1, a)) {
  1751. return false;
  1752. }
  1753. }
  1754. key = strkey2(k, len);
  1755. tabkey = strcopy(key, a);
  1756. if (tabkey == 0) return false;
  1757. hash = table_hash(key.str.str, key.str.len);
  1758. insert(&t->t, key, tabkey, v, hash, &strhash, &streql);
  1759. return true;
  1760. }
  1761. bool upb_strtable_lookup2(const upb_strtable *t, const char *key, size_t len,
  1762. upb_value *v) {
  1763. uint32_t hash = table_hash(key, len);
  1764. return lookup(&t->t, strkey2(key, len), v, hash, &streql);
  1765. }
  1766. bool upb_strtable_remove3(upb_strtable *t, const char *key, size_t len,
  1767. upb_value *val, upb_alloc *alloc) {
  1768. uint32_t hash = table_hash(key, len);
  1769. upb_tabkey tabkey;
  1770. if (rm(&t->t, strkey2(key, len), val, &tabkey, hash, &streql)) {
  1771. if (alloc) {
  1772. /* Arena-based allocs don't need to free and won't pass this. */
  1773. upb_free(alloc, (void*)tabkey);
  1774. }
  1775. return true;
  1776. } else {
  1777. return false;
  1778. }
  1779. }
  1780. /* Iteration */
  1781. void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t) {
  1782. i->t = t;
  1783. i->index = begin(&t->t);
  1784. }
  1785. void upb_strtable_next(upb_strtable_iter *i) {
  1786. i->index = next(&i->t->t, i->index);
  1787. }
  1788. bool upb_strtable_done(const upb_strtable_iter *i) {
  1789. if (!i->t) return true;
  1790. return i->index >= upb_table_size(&i->t->t) ||
  1791. upb_tabent_isempty(str_tabent(i));
  1792. }
  1793. upb_strview upb_strtable_iter_key(const upb_strtable_iter *i) {
  1794. upb_strview key;
  1795. uint32_t len;
  1796. UPB_ASSERT(!upb_strtable_done(i));
  1797. key.data = upb_tabstr(str_tabent(i)->key, &len);
  1798. key.size = len;
  1799. return key;
  1800. }
  1801. upb_value upb_strtable_iter_value(const upb_strtable_iter *i) {
  1802. UPB_ASSERT(!upb_strtable_done(i));
  1803. return _upb_value_val(str_tabent(i)->val.val);
  1804. }
  1805. void upb_strtable_iter_setdone(upb_strtable_iter *i) {
  1806. i->t = NULL;
  1807. i->index = SIZE_MAX;
  1808. }
  1809. bool upb_strtable_iter_isequal(const upb_strtable_iter *i1,
  1810. const upb_strtable_iter *i2) {
  1811. if (upb_strtable_done(i1) && upb_strtable_done(i2))
  1812. return true;
  1813. return i1->t == i2->t && i1->index == i2->index;
  1814. }
  1815. /* upb_inttable ***************************************************************/
  1816. /* For inttables we use a hybrid structure where small keys are kept in an
  1817. * array and large keys are put in the hash table. */
  1818. static uint32_t inthash(upb_tabkey key) { return upb_inthash(key); }
  1819. static bool inteql(upb_tabkey k1, lookupkey_t k2) {
  1820. return k1 == k2.num;
  1821. }
  1822. static upb_tabval *mutable_array(upb_inttable *t) {
  1823. return (upb_tabval*)t->array;
  1824. }
  1825. static upb_tabval *inttable_val(upb_inttable *t, uintptr_t key) {
  1826. if (key < t->array_size) {
  1827. return upb_arrhas(t->array[key]) ? &(mutable_array(t)[key]) : NULL;
  1828. } else {
  1829. upb_tabent *e =
  1830. findentry_mutable(&t->t, intkey(key), upb_inthash(key), &inteql);
  1831. return e ? &e->val : NULL;
  1832. }
  1833. }
  1834. static const upb_tabval *inttable_val_const(const upb_inttable *t,
  1835. uintptr_t key) {
  1836. return inttable_val((upb_inttable*)t, key);
  1837. }
  1838. size_t upb_inttable_count(const upb_inttable *t) {
  1839. return t->t.count + t->array_count;
  1840. }
  1841. static void check(upb_inttable *t) {
  1842. UPB_UNUSED(t);
  1843. #if defined(UPB_DEBUG_TABLE) && !defined(NDEBUG)
  1844. {
  1845. /* This check is very expensive (makes inserts/deletes O(N)). */
  1846. size_t count = 0;
  1847. upb_inttable_iter i;
  1848. upb_inttable_begin(&i, t);
  1849. for(; !upb_inttable_done(&i); upb_inttable_next(&i), count++) {
  1850. UPB_ASSERT(upb_inttable_lookup(t, upb_inttable_iter_key(&i), NULL));
  1851. }
  1852. UPB_ASSERT(count == upb_inttable_count(t));
  1853. }
  1854. #endif
  1855. }
  1856. bool upb_inttable_sizedinit(upb_inttable *t, size_t asize, int hsize_lg2,
  1857. upb_alloc *a) {
  1858. size_t array_bytes;
  1859. if (!init(&t->t, hsize_lg2, a)) return false;
  1860. /* Always make the array part at least 1 long, so that we know key 0
  1861. * won't be in the hash part, which simplifies things. */
  1862. t->array_size = UPB_MAX(1, asize);
  1863. t->array_count = 0;
  1864. array_bytes = t->array_size * sizeof(upb_value);
  1865. t->array = upb_malloc(a, array_bytes);
  1866. if (!t->array) {
  1867. uninit(&t->t, a);
  1868. return false;
  1869. }
  1870. memset(mutable_array(t), 0xff, array_bytes);
  1871. check(t);
  1872. return true;
  1873. }
  1874. bool upb_inttable_init2(upb_inttable *t, upb_ctype_t ctype, upb_alloc *a) {
  1875. UPB_UNUSED(ctype); /* TODO(haberman): rm */
  1876. return upb_inttable_sizedinit(t, 0, 4, a);
  1877. }
  1878. void upb_inttable_uninit2(upb_inttable *t, upb_alloc *a) {
  1879. uninit(&t->t, a);
  1880. upb_free(a, mutable_array(t));
  1881. }
  1882. bool upb_inttable_insert2(upb_inttable *t, uintptr_t key, upb_value val,
  1883. upb_alloc *a) {
  1884. upb_tabval tabval;
  1885. tabval.val = val.val;
  1886. UPB_ASSERT(upb_arrhas(tabval)); /* This will reject (uint64_t)-1. Fix this. */
  1887. if (key < t->array_size) {
  1888. UPB_ASSERT(!upb_arrhas(t->array[key]));
  1889. t->array_count++;
  1890. mutable_array(t)[key].val = val.val;
  1891. } else {
  1892. if (isfull(&t->t)) {
  1893. /* Need to resize the hash part, but we re-use the array part. */
  1894. size_t i;
  1895. upb_table new_table;
  1896. if (!init(&new_table, t->t.size_lg2 + 1, a)) {
  1897. return false;
  1898. }
  1899. for (i = begin(&t->t); i < upb_table_size(&t->t); i = next(&t->t, i)) {
  1900. const upb_tabent *e = &t->t.entries[i];
  1901. uint32_t hash;
  1902. upb_value v;
  1903. _upb_value_setval(&v, e->val.val);
  1904. hash = upb_inthash(e->key);
  1905. insert(&new_table, intkey(e->key), e->key, v, hash, &inthash, &inteql);
  1906. }
  1907. UPB_ASSERT(t->t.count == new_table.count);
  1908. uninit(&t->t, a);
  1909. t->t = new_table;
  1910. }
  1911. insert(&t->t, intkey(key), key, val, upb_inthash(key), &inthash, &inteql);
  1912. }
  1913. check(t);
  1914. return true;
  1915. }
  1916. bool upb_inttable_lookup(const upb_inttable *t, uintptr_t key, upb_value *v) {
  1917. const upb_tabval *table_v = inttable_val_const(t, key);
  1918. if (!table_v) return false;
  1919. if (v) _upb_value_setval(v, table_v->val);
  1920. return true;
  1921. }
  1922. bool upb_inttable_replace(upb_inttable *t, uintptr_t key, upb_value val) {
  1923. upb_tabval *table_v = inttable_val(t, key);
  1924. if (!table_v) return false;
  1925. table_v->val = val.val;
  1926. return true;
  1927. }
  1928. bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val) {
  1929. bool success;
  1930. if (key < t->array_size) {
  1931. if (upb_arrhas(t->array[key])) {
  1932. upb_tabval empty = UPB_TABVALUE_EMPTY_INIT;
  1933. t->array_count--;
  1934. if (val) {
  1935. _upb_value_setval(val, t->array[key].val);
  1936. }
  1937. mutable_array(t)[key] = empty;
  1938. success = true;
  1939. } else {
  1940. success = false;
  1941. }
  1942. } else {
  1943. success = rm(&t->t, intkey(key), val, NULL, upb_inthash(key), &inteql);
  1944. }
  1945. check(t);
  1946. return success;
  1947. }
  1948. bool upb_inttable_insertptr2(upb_inttable *t, const void *key, upb_value val,
  1949. upb_alloc *a) {
  1950. return upb_inttable_insert2(t, (uintptr_t)key, val, a);
  1951. }
  1952. bool upb_inttable_lookupptr(const upb_inttable *t, const void *key,
  1953. upb_value *v) {
  1954. return upb_inttable_lookup(t, (uintptr_t)key, v);
  1955. }
  1956. bool upb_inttable_removeptr(upb_inttable *t, const void *key, upb_value *val) {
  1957. return upb_inttable_remove(t, (uintptr_t)key, val);
  1958. }
  1959. void upb_inttable_compact2(upb_inttable *t, upb_alloc *a) {
  1960. /* A power-of-two histogram of the table keys. */
  1961. size_t counts[UPB_MAXARRSIZE + 1] = {0};
  1962. /* The max key in each bucket. */
  1963. uintptr_t max[UPB_MAXARRSIZE + 1] = {0};
  1964. upb_inttable_iter i;
  1965. size_t arr_count;
  1966. int size_lg2;
  1967. upb_inttable new_t;
  1968. upb_inttable_begin(&i, t);
  1969. for (; !upb_inttable_done(&i); upb_inttable_next(&i)) {
  1970. uintptr_t key = upb_inttable_iter_key(&i);
  1971. int bucket = log2ceil(key);
  1972. max[bucket] = UPB_MAX(max[bucket], key);
  1973. counts[bucket]++;
  1974. }
  1975. /* Find the largest power of two that satisfies the MIN_DENSITY
  1976. * definition (while actually having some keys). */
  1977. arr_count = upb_inttable_count(t);
  1978. for (size_lg2 = ARRAY_SIZE(counts) - 1; size_lg2 > 0; size_lg2--) {
  1979. if (counts[size_lg2] == 0) {
  1980. /* We can halve again without losing any entries. */
  1981. continue;
  1982. } else if (arr_count >= (1 << size_lg2) * MIN_DENSITY) {
  1983. break;
  1984. }
  1985. arr_count -= counts[size_lg2];
  1986. }
  1987. UPB_ASSERT(arr_count <= upb_inttable_count(t));
  1988. {
  1989. /* Insert all elements into new, perfectly-sized table. */
  1990. size_t arr_size = max[size_lg2] + 1; /* +1 so arr[max] will fit. */
  1991. size_t hash_count = upb_inttable_count(t) - arr_count;
  1992. size_t hash_size = hash_count ? (hash_count / MAX_LOAD) + 1 : 0;
  1993. int hashsize_lg2 = log2ceil(hash_size);
  1994. upb_inttable_sizedinit(&new_t, arr_size, hashsize_lg2, a);
  1995. upb_inttable_begin(&i, t);
  1996. for (; !upb_inttable_done(&i); upb_inttable_next(&i)) {
  1997. uintptr_t k = upb_inttable_iter_key(&i);
  1998. upb_inttable_insert2(&new_t, k, upb_inttable_iter_value(&i), a);
  1999. }
  2000. UPB_ASSERT(new_t.array_size == arr_size);
  2001. UPB_ASSERT(new_t.t.size_lg2 == hashsize_lg2);
  2002. }
  2003. upb_inttable_uninit2(t, a);
  2004. *t = new_t;
  2005. }
  2006. /* Iteration. */
  2007. static const upb_tabent *int_tabent(const upb_inttable_iter *i) {
  2008. UPB_ASSERT(!i->array_part);
  2009. return &i->t->t.entries[i->index];
  2010. }
  2011. static upb_tabval int_arrent(const upb_inttable_iter *i) {
  2012. UPB_ASSERT(i->array_part);
  2013. return i->t->array[i->index];
  2014. }
  2015. void upb_inttable_begin(upb_inttable_iter *i, const upb_inttable *t) {
  2016. i->t = t;
  2017. i->index = -1;
  2018. i->array_part = true;
  2019. upb_inttable_next(i);
  2020. }
  2021. void upb_inttable_next(upb_inttable_iter *iter) {
  2022. const upb_inttable *t = iter->t;
  2023. if (iter->array_part) {
  2024. while (++iter->index < t->array_size) {
  2025. if (upb_arrhas(int_arrent(iter))) {
  2026. return;
  2027. }
  2028. }
  2029. iter->array_part = false;
  2030. iter->index = begin(&t->t);
  2031. } else {
  2032. iter->index = next(&t->t, iter->index);
  2033. }
  2034. }
  2035. bool upb_inttable_done(const upb_inttable_iter *i) {
  2036. if (!i->t) return true;
  2037. if (i->array_part) {
  2038. return i->index >= i->t->array_size ||
  2039. !upb_arrhas(int_arrent(i));
  2040. } else {
  2041. return i->index >= upb_table_size(&i->t->t) ||
  2042. upb_tabent_isempty(int_tabent(i));
  2043. }
  2044. }
  2045. uintptr_t upb_inttable_iter_key(const upb_inttable_iter *i) {
  2046. UPB_ASSERT(!upb_inttable_done(i));
  2047. return i->array_part ? i->index : int_tabent(i)->key;
  2048. }
  2049. upb_value upb_inttable_iter_value(const upb_inttable_iter *i) {
  2050. UPB_ASSERT(!upb_inttable_done(i));
  2051. return _upb_value_val(
  2052. i->array_part ? i->t->array[i->index].val : int_tabent(i)->val.val);
  2053. }
  2054. void upb_inttable_iter_setdone(upb_inttable_iter *i) {
  2055. i->t = NULL;
  2056. i->index = SIZE_MAX;
  2057. i->array_part = false;
  2058. }
  2059. bool upb_inttable_iter_isequal(const upb_inttable_iter *i1,
  2060. const upb_inttable_iter *i2) {
  2061. if (upb_inttable_done(i1) && upb_inttable_done(i2))
  2062. return true;
  2063. return i1->t == i2->t && i1->index == i2->index &&
  2064. i1->array_part == i2->array_part;
  2065. }
  2066. #include <errno.h>
  2067. #include <stdarg.h>
  2068. #include <stddef.h>
  2069. #include <stdint.h>
  2070. #include <stdio.h>
  2071. #include <stdlib.h>
  2072. #include <string.h>
  2073. /* upb_status *****************************************************************/
  2074. void upb_status_clear(upb_status *status) {
  2075. if (!status) return;
  2076. status->ok = true;
  2077. status->msg[0] = '\0';
  2078. }
  2079. bool upb_ok(const upb_status *status) { return status->ok; }
  2080. const char *upb_status_errmsg(const upb_status *status) { return status->msg; }
  2081. void upb_status_seterrmsg(upb_status *status, const char *msg) {
  2082. if (!status) return;
  2083. status->ok = false;
  2084. strncpy(status->msg, msg, UPB_STATUS_MAX_MESSAGE - 1);
  2085. status->msg[UPB_STATUS_MAX_MESSAGE - 1] = '\0';
  2086. }
  2087. void upb_status_seterrf(upb_status *status, const char *fmt, ...) {
  2088. va_list args;
  2089. va_start(args, fmt);
  2090. upb_status_vseterrf(status, fmt, args);
  2091. va_end(args);
  2092. }
  2093. void upb_status_vseterrf(upb_status *status, const char *fmt, va_list args) {
  2094. if (!status) return;
  2095. status->ok = false;
  2096. vsnprintf(status->msg, sizeof(status->msg), fmt, args);
  2097. status->msg[UPB_STATUS_MAX_MESSAGE - 1] = '\0';
  2098. }
  2099. void upb_status_vappenderrf(upb_status *status, const char *fmt, va_list args) {
  2100. size_t len;
  2101. if (!status) return;
  2102. status->ok = false;
  2103. len = strlen(status->msg);
  2104. vsnprintf(status->msg + len, sizeof(status->msg) - len, fmt, args);
  2105. status->msg[UPB_STATUS_MAX_MESSAGE - 1] = '\0';
  2106. }
  2107. /* upb_alloc ******************************************************************/
  2108. static void *upb_global_allocfunc(upb_alloc *alloc, void *ptr, size_t oldsize,
  2109. size_t size) {
  2110. UPB_UNUSED(alloc);
  2111. UPB_UNUSED(oldsize);
  2112. if (size == 0) {
  2113. free(ptr);
  2114. return NULL;
  2115. } else {
  2116. return realloc(ptr, size);
  2117. }
  2118. }
  2119. upb_alloc upb_alloc_global = {&upb_global_allocfunc};
  2120. /* upb_arena ******************************************************************/
  2121. /* Be conservative and choose 16 in case anyone is using SSE. */
  2122. struct mem_block {
  2123. struct mem_block *next;
  2124. uint32_t size;
  2125. uint32_t cleanups;
  2126. /* Data follows. */
  2127. };
  2128. typedef struct cleanup_ent {
  2129. upb_cleanup_func *cleanup;
  2130. void *ud;
  2131. } cleanup_ent;
  2132. static const size_t memblock_reserve = UPB_ALIGN_UP(sizeof(mem_block), 16);
  2133. static upb_arena *arena_findroot(upb_arena *a) {
  2134. /* Path splitting keeps time complexity down, see:
  2135. * https://en.wikipedia.org/wiki/Disjoint-set_data_structure */
  2136. while (a->parent != a) {
  2137. upb_arena *next = a->parent;
  2138. a->parent = next->parent;
  2139. a = next;
  2140. }
  2141. return a;
  2142. }
  2143. static void upb_arena_addblock(upb_arena *a, upb_arena *root, void *ptr,
  2144. size_t size) {
  2145. mem_block *block = ptr;
  2146. /* The block is for arena |a|, but should appear in the freelist of |root|. */
  2147. block->next = root->freelist;
  2148. block->size = (uint32_t)size;
  2149. block->cleanups = 0;
  2150. root->freelist = block;
  2151. a->last_size = block->size;
  2152. if (!root->freelist_tail) root->freelist_tail = block;
  2153. a->head.ptr = UPB_PTR_AT(block, memblock_reserve, char);
  2154. a->head.end = UPB_PTR_AT(block, size, char);
  2155. a->cleanups = &block->cleanups;
  2156. UPB_POISON_MEMORY_REGION(a->head.ptr, a->head.end - a->head.ptr);
  2157. }
  2158. static bool upb_arena_allocblock(upb_arena *a, size_t size) {
  2159. upb_arena *root = arena_findroot(a);
  2160. size_t block_size = UPB_MAX(size, a->last_size * 2) + memblock_reserve;
  2161. mem_block *block = upb_malloc(root->block_alloc, block_size);
  2162. if (!block) return false;
  2163. upb_arena_addblock(a, root, block, block_size);
  2164. return true;
  2165. }
  2166. void *_upb_arena_slowmalloc(upb_arena *a, size_t size) {
  2167. if (!upb_arena_allocblock(a, size)) return NULL; /* Out of memory. */
  2168. UPB_ASSERT(_upb_arenahas(a) >= size);
  2169. return upb_arena_malloc(a, size);
  2170. }
  2171. static void *upb_arena_doalloc(upb_alloc *alloc, void *ptr, size_t oldsize,
  2172. size_t size) {
  2173. upb_arena *a = (upb_arena*)alloc; /* upb_alloc is initial member. */
  2174. return upb_arena_realloc(a, ptr, oldsize, size);
  2175. }
  2176. /* Public Arena API ***********************************************************/
  2177. upb_arena *arena_initslow(void *mem, size_t n, upb_alloc *alloc) {
  2178. const size_t first_block_overhead = sizeof(upb_arena) + memblock_reserve;
  2179. upb_arena *a;
  2180. /* We need to malloc the initial block. */
  2181. n = first_block_overhead + 256;
  2182. if (!alloc || !(mem = upb_malloc(alloc, n))) {
  2183. return NULL;
  2184. }
  2185. a = UPB_PTR_AT(mem, n - sizeof(*a), upb_arena);
  2186. n -= sizeof(*a);
  2187. a->head.alloc.func = &upb_arena_doalloc;
  2188. a->block_alloc = alloc;
  2189. a->parent = a;
  2190. a->refcount = 1;
  2191. a->freelist = NULL;
  2192. a->freelist_tail = NULL;
  2193. upb_arena_addblock(a, a, mem, n);
  2194. return a;
  2195. }
  2196. upb_arena *upb_arena_init(void *mem, size_t n, upb_alloc *alloc) {
  2197. upb_arena *a;
  2198. /* Round block size down to alignof(*a) since we will allocate the arena
  2199. * itself at the end. */
  2200. n = UPB_ALIGN_DOWN(n, UPB_ALIGN_OF(upb_arena));
  2201. if (UPB_UNLIKELY(n < sizeof(upb_arena))) {
  2202. return arena_initslow(mem, n, alloc);
  2203. }
  2204. a = UPB_PTR_AT(mem, n - sizeof(*a), upb_arena);
  2205. a->head.alloc.func = &upb_arena_doalloc;
  2206. a->block_alloc = alloc;
  2207. a->parent = a;
  2208. a->refcount = 1;
  2209. a->last_size = UPB_MAX(128, n);
  2210. a->head.ptr = mem;
  2211. a->head.end = UPB_PTR_AT(mem, n - sizeof(*a), char);
  2212. a->freelist = NULL;
  2213. a->cleanups = NULL;
  2214. return a;
  2215. }
  2216. static void arena_dofree(upb_arena *a) {
  2217. mem_block *block = a->freelist;
  2218. UPB_ASSERT(a->parent == a);
  2219. UPB_ASSERT(a->refcount == 0);
  2220. while (block) {
  2221. /* Load first since we are deleting block. */
  2222. mem_block *next = block->next;
  2223. if (block->cleanups > 0) {
  2224. cleanup_ent *end = UPB_PTR_AT(block, block->size, void);
  2225. cleanup_ent *ptr = end - block->cleanups;
  2226. for (; ptr < end; ptr++) {
  2227. ptr->cleanup(ptr->ud);
  2228. }
  2229. }
  2230. upb_free(a->block_alloc, block);
  2231. block = next;
  2232. }
  2233. }
  2234. void upb_arena_free(upb_arena *a) {
  2235. a = arena_findroot(a);
  2236. if (--a->refcount == 0) arena_dofree(a);
  2237. }
  2238. bool upb_arena_addcleanup(upb_arena *a, void *ud, upb_cleanup_func *func) {
  2239. cleanup_ent *ent;
  2240. if (!a->cleanups || _upb_arenahas(a) < sizeof(cleanup_ent)) {
  2241. if (!upb_arena_allocblock(a, 128)) return false; /* Out of memory. */
  2242. UPB_ASSERT(_upb_arenahas(a) >= sizeof(cleanup_ent));
  2243. }
  2244. a->head.end -= sizeof(cleanup_ent);
  2245. ent = (cleanup_ent*)a->head.end;
  2246. (*a->cleanups)++;
  2247. UPB_UNPOISON_MEMORY_REGION(ent, sizeof(cleanup_ent));
  2248. ent->cleanup = func;
  2249. ent->ud = ud;
  2250. return true;
  2251. }
  2252. void upb_arena_fuse(upb_arena *a1, upb_arena *a2) {
  2253. upb_arena *r1 = arena_findroot(a1);
  2254. upb_arena *r2 = arena_findroot(a2);
  2255. if (r1 == r2) return; /* Already fused. */
  2256. /* We want to join the smaller tree to the larger tree.
  2257. * So swap first if they are backwards. */
  2258. if (r1->refcount < r2->refcount) {
  2259. upb_arena *tmp = r1;
  2260. r1 = r2;
  2261. r2 = tmp;
  2262. }
  2263. /* r1 takes over r2's freelist and refcount. */
  2264. r1->refcount += r2->refcount;
  2265. if (r2->freelist_tail) {
  2266. UPB_ASSERT(r2->freelist_tail->next == NULL);
  2267. r2->freelist_tail->next = r1->freelist;
  2268. r1->freelist = r2->freelist;
  2269. }
  2270. r2->parent = r1;
  2271. }
  2272. // Fast decoder: ~3x the speed of decode.c, but x86-64 specific.
  2273. // Also the table size grows by 2x.
  2274. //
  2275. // Could potentially be ported to ARM64 or other 64-bit archs that pass at
  2276. // least six arguments in registers.
  2277. //
  2278. // The overall design is to create specialized functions for every possible
  2279. // field type (eg. oneof boolean field with a 1 byte tag) and then dispatch
  2280. // to the specialized function as quickly as possible.
  2281. /* Must be last. */
  2282. #if UPB_FASTTABLE
  2283. // The standard set of arguments passed to each parsing function.
  2284. // Thanks to x86-64 calling conventions, these will stay in registers.
  2285. #define UPB_PARSE_PARAMS \
  2286. upb_decstate *d, const char *ptr, upb_msg *msg, intptr_t table, \
  2287. uint64_t hasbits, uint64_t data
  2288. #define UPB_PARSE_ARGS d, ptr, msg, table, hasbits, data
  2289. #define RETURN_GENERIC(m) \
  2290. /* fprintf(stderr, m); */ \
  2291. return fastdecode_generic(d, ptr, msg, table, hasbits, 0);
  2292. typedef enum {
  2293. CARD_s = 0, /* Singular (optional, non-repeated) */
  2294. CARD_o = 1, /* Oneof */
  2295. CARD_r = 2, /* Repeated */
  2296. CARD_p = 3 /* Packed Repeated */
  2297. } upb_card;
  2298. UPB_NOINLINE
  2299. static const char *fastdecode_isdonefallback(upb_decstate *d, const char *ptr,
  2300. upb_msg *msg, intptr_t table,
  2301. uint64_t hasbits, int overrun) {
  2302. ptr = decode_isdonefallback_inl(d, ptr, overrun);
  2303. if (ptr == NULL) {
  2304. return fastdecode_err(d);
  2305. }
  2306. uint16_t tag = fastdecode_loadtag(ptr);
  2307. return fastdecode_tagdispatch(d, ptr, msg, table, hasbits, tag);
  2308. }
  2309. UPB_FORCEINLINE
  2310. static const char *fastdecode_dispatch(upb_decstate *d, const char *ptr,
  2311. upb_msg *msg, intptr_t table,
  2312. uint64_t hasbits) {
  2313. if (UPB_UNLIKELY(ptr >= d->limit_ptr)) {
  2314. int overrun = ptr - d->end;
  2315. if (UPB_LIKELY(overrun == d->limit)) {
  2316. // Parse is finished.
  2317. *(uint32_t*)msg |= hasbits; // Sync hasbits.
  2318. return ptr;
  2319. } else {
  2320. return fastdecode_isdonefallback(d, ptr, msg, table, hasbits, overrun);
  2321. }
  2322. }
  2323. // Read two bytes of tag data (for a one-byte tag, the high byte is junk).
  2324. uint16_t tag = fastdecode_loadtag(ptr);
  2325. return fastdecode_tagdispatch(d, ptr, msg, table, hasbits, tag);
  2326. }
  2327. UPB_FORCEINLINE
  2328. static bool fastdecode_checktag(uint64_t data, int tagbytes) {
  2329. if (tagbytes == 1) {
  2330. return (data & 0xff) == 0;
  2331. } else {
  2332. return (data & 0xffff) == 0;
  2333. }
  2334. }
  2335. UPB_FORCEINLINE
  2336. static const char *fastdecode_longsize(const char *ptr, int *size) {
  2337. int i;
  2338. UPB_ASSERT(*size & 0x80);
  2339. *size &= 0xff;
  2340. for (i = 0; i < 3; i++) {
  2341. ptr++;
  2342. size_t byte = (uint8_t)ptr[-1];
  2343. *size += (byte - 1) << (7 + 7 * i);
  2344. if (UPB_LIKELY((byte & 0x80) == 0)) return ptr;
  2345. }
  2346. ptr++;
  2347. size_t byte = (uint8_t)ptr[-1];
  2348. // len is limited by 2gb not 4gb, hence 8 and not 16 as normally expected
  2349. // for a 32 bit varint.
  2350. if (UPB_UNLIKELY(byte >= 8)) return NULL;
  2351. *size += (byte - 1) << 28;
  2352. return ptr;
  2353. }
  2354. UPB_FORCEINLINE
  2355. static bool fastdecode_boundscheck(const char *ptr, size_t len,
  2356. const char *end) {
  2357. uintptr_t uptr = (uintptr_t)ptr;
  2358. uintptr_t uend = (uintptr_t)end + 16;
  2359. uintptr_t res = uptr + len;
  2360. return res < uptr || res > uend;
  2361. }
  2362. UPB_FORCEINLINE
  2363. static bool fastdecode_boundscheck2(const char *ptr, size_t len,
  2364. const char *end) {
  2365. // This is one extra branch compared to the more normal:
  2366. // return (size_t)(end - ptr) < size;
  2367. // However it is one less computation if we are just about to use "ptr + len":
  2368. // https://godbolt.org/z/35YGPz
  2369. // In microbenchmarks this shows an overall 4% improvement.
  2370. uintptr_t uptr = (uintptr_t)ptr;
  2371. uintptr_t uend = (uintptr_t)end;
  2372. uintptr_t res = uptr + len;
  2373. return res < uptr || res > uend;
  2374. }
  2375. typedef const char *fastdecode_delimfunc(upb_decstate *d, const char *ptr,
  2376. void *ctx);
  2377. UPB_FORCEINLINE
  2378. static const char *fastdecode_delimited(upb_decstate *d, const char *ptr,
  2379. fastdecode_delimfunc *func, void *ctx) {
  2380. ptr++;
  2381. int len = (int8_t)ptr[-1];
  2382. if (fastdecode_boundscheck2(ptr, len, d->limit_ptr)) {
  2383. // Slow case: Sub-message is >=128 bytes and/or exceeds the current buffer.
  2384. // If it exceeds the buffer limit, limit/limit_ptr will change during
  2385. // sub-message parsing, so we need to preserve delta, not limit.
  2386. if (UPB_UNLIKELY(len & 0x80)) {
  2387. // Size varint >1 byte (length >= 128).
  2388. ptr = fastdecode_longsize(ptr, &len);
  2389. if (!ptr) {
  2390. // Corrupt wire format: size exceeded INT_MAX.
  2391. return NULL;
  2392. }
  2393. }
  2394. if (ptr - d->end + (int)len > d->limit) {
  2395. // Corrupt wire format: invalid limit.
  2396. return NULL;
  2397. }
  2398. int delta = decode_pushlimit(d, ptr, len);
  2399. ptr = func(d, ptr, ctx);
  2400. decode_poplimit(d, ptr, delta);
  2401. } else {
  2402. // Fast case: Sub-message is <128 bytes and fits in the current buffer.
  2403. // This means we can preserve limit/limit_ptr verbatim.
  2404. const char *saved_limit_ptr = d->limit_ptr;
  2405. int saved_limit = d->limit;
  2406. d->limit_ptr = ptr + len;
  2407. d->limit = d->limit_ptr - d->end;
  2408. UPB_ASSERT(d->limit_ptr == d->end + UPB_MIN(0, d->limit));
  2409. ptr = func(d, ptr, ctx);
  2410. d->limit_ptr = saved_limit_ptr;
  2411. d->limit = saved_limit;
  2412. UPB_ASSERT(d->limit_ptr == d->end + UPB_MIN(0, d->limit));
  2413. }
  2414. return ptr;
  2415. }
  2416. /* singular, oneof, repeated field handling ***********************************/
  2417. typedef struct {
  2418. upb_array *arr;
  2419. void *end;
  2420. } fastdecode_arr;
  2421. typedef enum {
  2422. FD_NEXT_ATLIMIT,
  2423. FD_NEXT_SAMEFIELD,
  2424. FD_NEXT_OTHERFIELD
  2425. } fastdecode_next;
  2426. typedef struct {
  2427. void *dst;
  2428. fastdecode_next next;
  2429. uint32_t tag;
  2430. } fastdecode_nextret;
  2431. UPB_FORCEINLINE
  2432. static void *fastdecode_resizearr(upb_decstate *d, void *dst,
  2433. fastdecode_arr *farr, int valbytes) {
  2434. if (UPB_UNLIKELY(dst == farr->end)) {
  2435. size_t old_size = farr->arr->size;
  2436. size_t old_bytes = old_size * valbytes;
  2437. size_t new_size = old_size * 2;
  2438. size_t new_bytes = new_size * valbytes;
  2439. char *old_ptr = _upb_array_ptr(farr->arr);
  2440. char *new_ptr = upb_arena_realloc(&d->arena, old_ptr, old_bytes, new_bytes);
  2441. uint8_t elem_size_lg2 = __builtin_ctz(valbytes);
  2442. farr->arr->size = new_size;
  2443. farr->arr->data = _upb_array_tagptr(new_ptr, elem_size_lg2);
  2444. dst = (void*)(new_ptr + (old_size * valbytes));
  2445. farr->end = (void*)(new_ptr + (new_size * valbytes));
  2446. }
  2447. return dst;
  2448. }
  2449. UPB_FORCEINLINE
  2450. static bool fastdecode_tagmatch(uint32_t tag, uint64_t data, int tagbytes) {
  2451. if (tagbytes == 1) {
  2452. return (uint8_t)tag == (uint8_t)data;
  2453. } else {
  2454. return (uint16_t)tag == (uint16_t)data;
  2455. }
  2456. }
  2457. UPB_FORCEINLINE
  2458. static void fastdecode_commitarr(void *dst, fastdecode_arr *farr,
  2459. int valbytes) {
  2460. farr->arr->len =
  2461. (size_t)((char *)dst - (char *)_upb_array_ptr(farr->arr)) / valbytes;
  2462. }
  2463. UPB_FORCEINLINE
  2464. static fastdecode_nextret fastdecode_nextrepeated(upb_decstate *d, void *dst,
  2465. const char **ptr,
  2466. fastdecode_arr *farr,
  2467. uint64_t data, int tagbytes,
  2468. int valbytes) {
  2469. fastdecode_nextret ret;
  2470. dst = (char *)dst + valbytes;
  2471. if (UPB_LIKELY(!decode_isdone(d, ptr))) {
  2472. ret.tag = fastdecode_loadtag(*ptr);
  2473. if (fastdecode_tagmatch(ret.tag, data, tagbytes)) {
  2474. ret.next = FD_NEXT_SAMEFIELD;
  2475. } else {
  2476. fastdecode_commitarr(dst, farr, valbytes);
  2477. ret.next = FD_NEXT_OTHERFIELD;
  2478. }
  2479. } else {
  2480. fastdecode_commitarr(dst, farr, valbytes);
  2481. ret.next = FD_NEXT_ATLIMIT;
  2482. }
  2483. ret.dst = dst;
  2484. return ret;
  2485. }
  2486. UPB_FORCEINLINE
  2487. static void *fastdecode_fieldmem(upb_msg *msg, uint64_t data) {
  2488. size_t ofs = data >> 48;
  2489. return (char *)msg + ofs;
  2490. }
  2491. UPB_FORCEINLINE
  2492. static void *fastdecode_getfield(upb_decstate *d, const char *ptr, upb_msg *msg,
  2493. uint64_t *data, uint64_t *hasbits,
  2494. fastdecode_arr *farr, int valbytes,
  2495. upb_card card) {
  2496. switch (card) {
  2497. case CARD_s: {
  2498. uint8_t hasbit_index = *data >> 24;
  2499. // Set hasbit and return pointer to scalar field.
  2500. *hasbits |= 1ull << hasbit_index;
  2501. return fastdecode_fieldmem(msg, *data);
  2502. }
  2503. case CARD_o: {
  2504. uint16_t case_ofs = *data >> 32;
  2505. uint32_t *oneof_case = UPB_PTR_AT(msg, case_ofs, uint32_t);
  2506. uint8_t field_number = *data >> 24;
  2507. *oneof_case = field_number;
  2508. return fastdecode_fieldmem(msg, *data);
  2509. }
  2510. case CARD_r: {
  2511. // Get pointer to upb_array and allocate/expand if necessary.
  2512. uint8_t elem_size_lg2 = __builtin_ctz(valbytes);
  2513. upb_array **arr_p = fastdecode_fieldmem(msg, *data);
  2514. char *begin;
  2515. *(uint32_t*)msg |= *hasbits;
  2516. *hasbits = 0;
  2517. if (UPB_LIKELY(!*arr_p)) {
  2518. farr->arr = _upb_array_new(&d->arena, 8, elem_size_lg2);
  2519. *arr_p = farr->arr;
  2520. } else {
  2521. farr->arr = *arr_p;
  2522. }
  2523. begin = _upb_array_ptr(farr->arr);
  2524. farr->end = begin + (farr->arr->size * valbytes);
  2525. *data = fastdecode_loadtag(ptr);
  2526. return begin + (farr->arr->len * valbytes);
  2527. }
  2528. default:
  2529. UPB_UNREACHABLE();
  2530. }
  2531. }
  2532. UPB_FORCEINLINE
  2533. static bool fastdecode_flippacked(uint64_t *data, int tagbytes) {
  2534. *data ^= (0x2 ^ 0x0); // Patch data to match packed wiretype.
  2535. return fastdecode_checktag(*data, tagbytes);
  2536. }
  2537. /* varint fields **************************************************************/
  2538. UPB_FORCEINLINE
  2539. static uint64_t fastdecode_munge(uint64_t val, int valbytes, bool zigzag) {
  2540. if (valbytes == 1) {
  2541. return val != 0;
  2542. } else if (zigzag) {
  2543. if (valbytes == 4) {
  2544. uint32_t n = val;
  2545. return (n >> 1) ^ -(int32_t)(n & 1);
  2546. } else if (valbytes == 8) {
  2547. return (val >> 1) ^ -(int64_t)(val & 1);
  2548. }
  2549. UPB_UNREACHABLE();
  2550. }
  2551. return val;
  2552. }
  2553. UPB_FORCEINLINE
  2554. static const char *fastdecode_varint64(const char *ptr, uint64_t *val) {
  2555. ptr++;
  2556. *val = (uint8_t)ptr[-1];
  2557. if (UPB_UNLIKELY(*val & 0x80)) {
  2558. int i;
  2559. for (i = 0; i < 8; i++) {
  2560. ptr++;
  2561. uint64_t byte = (uint8_t)ptr[-1];
  2562. *val += (byte - 1) << (7 + 7 * i);
  2563. if (UPB_LIKELY((byte & 0x80) == 0)) goto done;
  2564. }
  2565. ptr++;
  2566. uint64_t byte = (uint8_t)ptr[-1];
  2567. if (byte > 1) {
  2568. return NULL;
  2569. }
  2570. *val += (byte - 1) << 63;
  2571. }
  2572. done:
  2573. UPB_ASSUME(ptr != NULL);
  2574. return ptr;
  2575. }
  2576. UPB_FORCEINLINE
  2577. static const char *fastdecode_unpackedvarint(UPB_PARSE_PARAMS, int tagbytes,
  2578. int valbytes, upb_card card,
  2579. bool zigzag,
  2580. _upb_field_parser *packed) {
  2581. uint64_t val;
  2582. void *dst;
  2583. fastdecode_arr farr;
  2584. if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) {
  2585. if (card == CARD_r && fastdecode_flippacked(&data, tagbytes)) {
  2586. return packed(UPB_PARSE_ARGS);
  2587. }
  2588. RETURN_GENERIC("varint field tag mismatch\n");
  2589. }
  2590. dst =
  2591. fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, valbytes, card);
  2592. if (card == CARD_r) {
  2593. if (UPB_UNLIKELY(!dst)) {
  2594. RETURN_GENERIC("need array resize\n");
  2595. }
  2596. }
  2597. again:
  2598. if (card == CARD_r) {
  2599. dst = fastdecode_resizearr(d, dst, &farr, valbytes);
  2600. }
  2601. ptr += tagbytes;
  2602. ptr = fastdecode_varint64(ptr, &val);
  2603. if (ptr == NULL) return fastdecode_err(d);
  2604. val = fastdecode_munge(val, valbytes, zigzag);
  2605. memcpy(dst, &val, valbytes);
  2606. if (card == CARD_r) {
  2607. fastdecode_nextret ret =
  2608. fastdecode_nextrepeated(d, dst, &ptr, &farr, data, tagbytes, valbytes);
  2609. switch (ret.next) {
  2610. case FD_NEXT_SAMEFIELD:
  2611. dst = ret.dst;
  2612. goto again;
  2613. case FD_NEXT_OTHERFIELD:
  2614. return fastdecode_tagdispatch(d, ptr, msg, table, hasbits, ret.tag);
  2615. case FD_NEXT_ATLIMIT:
  2616. return ptr;
  2617. }
  2618. }
  2619. return fastdecode_dispatch(d, ptr, msg, table, hasbits);
  2620. }
  2621. typedef struct {
  2622. uint8_t valbytes;
  2623. bool zigzag;
  2624. void *dst;
  2625. fastdecode_arr farr;
  2626. } fastdecode_varintdata;
  2627. UPB_FORCEINLINE
  2628. static const char *fastdecode_topackedvarint(upb_decstate *d, const char *ptr,
  2629. void *ctx) {
  2630. fastdecode_varintdata *data = ctx;
  2631. void *dst = data->dst;
  2632. uint64_t val;
  2633. while (!decode_isdone(d, &ptr)) {
  2634. dst = fastdecode_resizearr(d, dst, &data->farr, data->valbytes);
  2635. ptr = fastdecode_varint64(ptr, &val);
  2636. if (ptr == NULL) return NULL;
  2637. val = fastdecode_munge(val, data->valbytes, data->zigzag);
  2638. memcpy(dst, &val, data->valbytes);
  2639. dst = (char *)dst + data->valbytes;
  2640. }
  2641. fastdecode_commitarr(dst, &data->farr, data->valbytes);
  2642. return ptr;
  2643. }
  2644. UPB_FORCEINLINE
  2645. static const char *fastdecode_packedvarint(UPB_PARSE_PARAMS, int tagbytes,
  2646. int valbytes, bool zigzag,
  2647. _upb_field_parser *unpacked) {
  2648. fastdecode_varintdata ctx = {valbytes, zigzag};
  2649. if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) {
  2650. if (fastdecode_flippacked(&data, tagbytes)) {
  2651. return unpacked(UPB_PARSE_ARGS);
  2652. } else {
  2653. RETURN_GENERIC("varint field tag mismatch\n");
  2654. }
  2655. }
  2656. ctx.dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &ctx.farr,
  2657. valbytes, CARD_r);
  2658. if (UPB_UNLIKELY(!ctx.dst)) {
  2659. RETURN_GENERIC("need array resize\n");
  2660. }
  2661. ptr += tagbytes;
  2662. ptr = fastdecode_delimited(d, ptr, &fastdecode_topackedvarint, &ctx);
  2663. if (UPB_UNLIKELY(ptr == NULL)) {
  2664. return fastdecode_err(d);
  2665. }
  2666. return fastdecode_dispatch(d, ptr, msg, table, hasbits);
  2667. }
  2668. UPB_FORCEINLINE
  2669. static const char *fastdecode_varint(UPB_PARSE_PARAMS, int tagbytes,
  2670. int valbytes, upb_card card, bool zigzag,
  2671. _upb_field_parser *unpacked,
  2672. _upb_field_parser *packed) {
  2673. if (card == CARD_p) {
  2674. return fastdecode_packedvarint(UPB_PARSE_ARGS, tagbytes, valbytes, zigzag,
  2675. unpacked);
  2676. } else {
  2677. return fastdecode_unpackedvarint(UPB_PARSE_ARGS, tagbytes, valbytes, card,
  2678. zigzag, packed);
  2679. }
  2680. }
  2681. #define z_ZZ true
  2682. #define b_ZZ false
  2683. #define v_ZZ false
  2684. /* Generate all combinations:
  2685. * {s,o,r,p} x {b1,v4,z4,v8,z8} x {1bt,2bt} */
  2686. #define F(card, type, valbytes, tagbytes) \
  2687. UPB_NOINLINE \
  2688. const char *upb_p##card##type##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \
  2689. return fastdecode_varint(UPB_PARSE_ARGS, tagbytes, valbytes, CARD_##card, \
  2690. type##_ZZ, \
  2691. &upb_pr##type##valbytes##_##tagbytes##bt, \
  2692. &upb_pp##type##valbytes##_##tagbytes##bt); \
  2693. }
  2694. #define TYPES(card, tagbytes) \
  2695. F(card, b, 1, tagbytes) \
  2696. F(card, v, 4, tagbytes) \
  2697. F(card, v, 8, tagbytes) \
  2698. F(card, z, 4, tagbytes) \
  2699. F(card, z, 8, tagbytes)
  2700. #define TAGBYTES(card) \
  2701. TYPES(card, 1) \
  2702. TYPES(card, 2)
  2703. TAGBYTES(s)
  2704. TAGBYTES(o)
  2705. TAGBYTES(r)
  2706. TAGBYTES(p)
  2707. #undef z_ZZ
  2708. #undef b_ZZ
  2709. #undef v_ZZ
  2710. #undef o_ONEOF
  2711. #undef s_ONEOF
  2712. #undef r_ONEOF
  2713. #undef F
  2714. #undef TYPES
  2715. #undef TAGBYTES
  2716. /* fixed fields ***************************************************************/
  2717. UPB_FORCEINLINE
  2718. static const char *fastdecode_unpackedfixed(UPB_PARSE_PARAMS, int tagbytes,
  2719. int valbytes, upb_card card,
  2720. _upb_field_parser *packed) {
  2721. void *dst;
  2722. fastdecode_arr farr;
  2723. if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) {
  2724. if (card == CARD_r && fastdecode_flippacked(&data, tagbytes)) {
  2725. return packed(UPB_PARSE_ARGS);
  2726. }
  2727. RETURN_GENERIC("fixed field tag mismatch\n");
  2728. }
  2729. dst =
  2730. fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, valbytes, card);
  2731. if (card == CARD_r) {
  2732. if (UPB_UNLIKELY(!dst)) {
  2733. RETURN_GENERIC("couldn't allocate array in arena\n");
  2734. }
  2735. }
  2736. again:
  2737. if (card == CARD_r) {
  2738. dst = fastdecode_resizearr(d, dst, &farr, valbytes);
  2739. }
  2740. ptr += tagbytes;
  2741. memcpy(dst, ptr, valbytes);
  2742. ptr += valbytes;
  2743. if (card == CARD_r) {
  2744. fastdecode_nextret ret =
  2745. fastdecode_nextrepeated(d, dst, &ptr, &farr, data, tagbytes, valbytes);
  2746. switch (ret.next) {
  2747. case FD_NEXT_SAMEFIELD:
  2748. dst = ret.dst;
  2749. goto again;
  2750. case FD_NEXT_OTHERFIELD:
  2751. return fastdecode_tagdispatch(d, ptr, msg, table, hasbits, ret.tag);
  2752. case FD_NEXT_ATLIMIT:
  2753. return ptr;
  2754. }
  2755. }
  2756. return fastdecode_dispatch(d, ptr, msg, table, hasbits);
  2757. }
  2758. UPB_FORCEINLINE
  2759. static const char *fastdecode_packedfixed(UPB_PARSE_PARAMS, int tagbytes,
  2760. int valbytes,
  2761. _upb_field_parser *unpacked) {
  2762. if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) {
  2763. if (fastdecode_flippacked(&data, tagbytes)) {
  2764. return unpacked(UPB_PARSE_ARGS);
  2765. } else {
  2766. RETURN_GENERIC("varint field tag mismatch\n");
  2767. }
  2768. }
  2769. ptr += tagbytes;
  2770. int size = (uint8_t)ptr[0];
  2771. ptr++;
  2772. if (size & 0x80) {
  2773. ptr = fastdecode_longsize(ptr, &size);
  2774. }
  2775. if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, size, d->limit_ptr)) ||
  2776. (size % valbytes) != 0) {
  2777. return fastdecode_err(d);
  2778. }
  2779. upb_array **arr_p = fastdecode_fieldmem(msg, data);
  2780. upb_array *arr = *arr_p;
  2781. uint8_t elem_size_lg2 = __builtin_ctz(valbytes);
  2782. int elems = size / valbytes;
  2783. if (UPB_LIKELY(!arr)) {
  2784. *arr_p = arr = _upb_array_new(&d->arena, elems, elem_size_lg2);
  2785. if (!arr) {
  2786. return fastdecode_err(d);
  2787. }
  2788. } else {
  2789. _upb_array_resize(arr, elems, &d->arena);
  2790. }
  2791. char *dst = _upb_array_ptr(arr);
  2792. memcpy(dst, ptr, size);
  2793. arr->len = elems;
  2794. return fastdecode_dispatch(d, ptr + size, msg, table, hasbits);
  2795. }
  2796. UPB_FORCEINLINE
  2797. static const char *fastdecode_fixed(UPB_PARSE_PARAMS, int tagbytes,
  2798. int valbytes, upb_card card,
  2799. _upb_field_parser *unpacked,
  2800. _upb_field_parser *packed) {
  2801. if (card == CARD_p) {
  2802. return fastdecode_packedfixed(UPB_PARSE_ARGS, tagbytes, valbytes, unpacked);
  2803. } else {
  2804. return fastdecode_unpackedfixed(UPB_PARSE_ARGS, tagbytes, valbytes, card,
  2805. packed);
  2806. }
  2807. }
  2808. /* Generate all combinations:
  2809. * {s,o,r,p} x {f4,f8} x {1bt,2bt} */
  2810. #define F(card, valbytes, tagbytes) \
  2811. UPB_NOINLINE \
  2812. const char *upb_p##card##f##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \
  2813. return fastdecode_fixed(UPB_PARSE_ARGS, tagbytes, valbytes, CARD_##card, \
  2814. &upb_ppf##valbytes##_##tagbytes##bt, \
  2815. &upb_prf##valbytes##_##tagbytes##bt); \
  2816. }
  2817. #define TYPES(card, tagbytes) \
  2818. F(card, 4, tagbytes) \
  2819. F(card, 8, tagbytes)
  2820. #define TAGBYTES(card) \
  2821. TYPES(card, 1) \
  2822. TYPES(card, 2)
  2823. TAGBYTES(s)
  2824. TAGBYTES(o)
  2825. TAGBYTES(r)
  2826. TAGBYTES(p)
  2827. #undef F
  2828. #undef TYPES
  2829. #undef TAGBYTES
  2830. /* string fields **************************************************************/
  2831. typedef const char *fastdecode_copystr_func(struct upb_decstate *d,
  2832. const char *ptr, upb_msg *msg,
  2833. const upb_msglayout *table,
  2834. uint64_t hasbits, upb_strview *dst);
  2835. UPB_NOINLINE
  2836. static const char *fastdecode_verifyutf8(upb_decstate *d, const char *ptr,
  2837. upb_msg *msg, intptr_t table,
  2838. uint64_t hasbits, upb_strview *dst) {
  2839. if (!decode_verifyutf8_inl(dst->data, dst->size)) {
  2840. return fastdecode_err(d);
  2841. }
  2842. return fastdecode_dispatch(d, ptr, msg, table, hasbits);
  2843. }
  2844. UPB_FORCEINLINE
  2845. static const char *fastdecode_longstring(struct upb_decstate *d,
  2846. const char *ptr, upb_msg *msg,
  2847. intptr_t table, uint64_t hasbits,
  2848. upb_strview *dst,
  2849. bool validate_utf8) {
  2850. int size = (uint8_t)ptr[0]; // Could plumb through hasbits.
  2851. ptr++;
  2852. if (size & 0x80) {
  2853. ptr = fastdecode_longsize(ptr, &size);
  2854. }
  2855. if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, size, d->limit_ptr))) {
  2856. dst->size = 0;
  2857. return fastdecode_err(d);
  2858. }
  2859. if (d->alias) {
  2860. dst->data = ptr;
  2861. dst->size = size;
  2862. } else {
  2863. char *data = upb_arena_malloc(&d->arena, size);
  2864. if (!data) {
  2865. return fastdecode_err(d);
  2866. }
  2867. memcpy(data, ptr, size);
  2868. dst->data = data;
  2869. dst->size = size;
  2870. }
  2871. if (validate_utf8) {
  2872. return fastdecode_verifyutf8(d, ptr + size, msg, table, hasbits, dst);
  2873. } else {
  2874. return fastdecode_dispatch(d, ptr + size, msg, table, hasbits);
  2875. }
  2876. }
  2877. UPB_NOINLINE
  2878. static const char *fastdecode_longstring_utf8(struct upb_decstate *d,
  2879. const char *ptr, upb_msg *msg,
  2880. intptr_t table, uint64_t hasbits,
  2881. upb_strview *dst) {
  2882. return fastdecode_longstring(d, ptr, msg, table, hasbits, dst, true);
  2883. }
  2884. UPB_NOINLINE
  2885. static const char *fastdecode_longstring_noutf8(struct upb_decstate *d,
  2886. const char *ptr, upb_msg *msg,
  2887. intptr_t table,
  2888. uint64_t hasbits,
  2889. upb_strview *dst) {
  2890. return fastdecode_longstring(d, ptr, msg, table, hasbits, dst, false);
  2891. }
  2892. UPB_FORCEINLINE
  2893. static void fastdecode_docopy(upb_decstate *d, const char *ptr, uint32_t size,
  2894. int copy, char *data, upb_strview *dst) {
  2895. d->arena.head.ptr += copy;
  2896. dst->data = data;
  2897. UPB_UNPOISON_MEMORY_REGION(data, copy);
  2898. memcpy(data, ptr, copy);
  2899. UPB_POISON_MEMORY_REGION(data + size, copy - size);
  2900. }
  2901. UPB_FORCEINLINE
  2902. static const char *fastdecode_copystring(UPB_PARSE_PARAMS, int tagbytes,
  2903. upb_card card, bool validate_utf8) {
  2904. upb_strview *dst;
  2905. fastdecode_arr farr;
  2906. int64_t size;
  2907. size_t arena_has;
  2908. size_t common_has;
  2909. char *buf;
  2910. UPB_ASSERT(!d->alias);
  2911. UPB_ASSERT(fastdecode_checktag(data, tagbytes));
  2912. dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr,
  2913. sizeof(upb_strview), card);
  2914. again:
  2915. if (card == CARD_r) {
  2916. dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_strview));
  2917. }
  2918. size = (uint8_t)ptr[tagbytes];
  2919. ptr += tagbytes + 1;
  2920. dst->size = size;
  2921. buf = d->arena.head.ptr;
  2922. arena_has = _upb_arenahas(&d->arena);
  2923. common_has = UPB_MIN(arena_has, (d->end - ptr) + 16);
  2924. if (UPB_LIKELY(size <= 15 - tagbytes)) {
  2925. if (arena_has < 16) goto longstr;
  2926. d->arena.head.ptr += 16;
  2927. memcpy(buf, ptr - tagbytes - 1, 16);
  2928. dst->data = buf + tagbytes + 1;
  2929. } else if (UPB_LIKELY(size <= 32)) {
  2930. if (UPB_UNLIKELY(common_has < 32)) goto longstr;
  2931. fastdecode_docopy(d, ptr, size, 32, buf, dst);
  2932. } else if (UPB_LIKELY(size <= 64)) {
  2933. if (UPB_UNLIKELY(common_has < 64)) goto longstr;
  2934. fastdecode_docopy(d, ptr, size, 64, buf, dst);
  2935. } else if (UPB_LIKELY(size < 128)) {
  2936. if (UPB_UNLIKELY(common_has < 128)) goto longstr;
  2937. fastdecode_docopy(d, ptr, size, 128, buf, dst);
  2938. } else {
  2939. goto longstr;
  2940. }
  2941. ptr += size;
  2942. if (card == CARD_r) {
  2943. if (validate_utf8 && !decode_verifyutf8_inl(dst->data, dst->size)) {
  2944. return fastdecode_err(d);
  2945. }
  2946. fastdecode_nextret ret = fastdecode_nextrepeated(
  2947. d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_strview));
  2948. switch (ret.next) {
  2949. case FD_NEXT_SAMEFIELD:
  2950. dst = ret.dst;
  2951. goto again;
  2952. case FD_NEXT_OTHERFIELD:
  2953. return fastdecode_tagdispatch(d, ptr, msg, table, hasbits, ret.tag);
  2954. case FD_NEXT_ATLIMIT:
  2955. return ptr;
  2956. }
  2957. }
  2958. if (card != CARD_r && validate_utf8) {
  2959. return fastdecode_verifyutf8(d, ptr, msg, table, hasbits, dst);
  2960. }
  2961. return fastdecode_dispatch(d, ptr, msg, table, hasbits);
  2962. longstr:
  2963. ptr--;
  2964. if (validate_utf8) {
  2965. return fastdecode_longstring_utf8(d, ptr, msg, table, hasbits, dst);
  2966. } else {
  2967. return fastdecode_longstring_noutf8(d, ptr, msg, table, hasbits, dst);
  2968. }
  2969. }
  2970. UPB_FORCEINLINE
  2971. static const char *fastdecode_string(UPB_PARSE_PARAMS, int tagbytes,
  2972. upb_card card, _upb_field_parser *copyfunc,
  2973. bool validate_utf8) {
  2974. upb_strview *dst;
  2975. fastdecode_arr farr;
  2976. int64_t size;
  2977. if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) {
  2978. RETURN_GENERIC("string field tag mismatch\n");
  2979. }
  2980. if (UPB_UNLIKELY(!d->alias)) {
  2981. return copyfunc(UPB_PARSE_ARGS);
  2982. }
  2983. dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr,
  2984. sizeof(upb_strview), card);
  2985. again:
  2986. if (card == CARD_r) {
  2987. dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_strview));
  2988. }
  2989. size = (int8_t)ptr[tagbytes];
  2990. ptr += tagbytes + 1;
  2991. dst->data = ptr;
  2992. dst->size = size;
  2993. if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, size, d->end))) {
  2994. ptr--;
  2995. if (validate_utf8) {
  2996. return fastdecode_longstring_utf8(d, ptr, msg, table, hasbits, dst);
  2997. } else {
  2998. return fastdecode_longstring_noutf8(d, ptr, msg, table, hasbits, dst);
  2999. }
  3000. }
  3001. ptr += size;
  3002. if (card == CARD_r) {
  3003. if (validate_utf8 && !decode_verifyutf8_inl(dst->data, dst->size)) {
  3004. return fastdecode_err(d);
  3005. }
  3006. fastdecode_nextret ret = fastdecode_nextrepeated(
  3007. d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_strview));
  3008. switch (ret.next) {
  3009. case FD_NEXT_SAMEFIELD:
  3010. dst = ret.dst;
  3011. if (UPB_UNLIKELY(!d->alias)) {
  3012. // Buffer flipped and we can't alias any more. Bounce to copyfunc(),
  3013. // but via dispatch since we need to reload table data also.
  3014. fastdecode_commitarr(dst, &farr, sizeof(upb_strview));
  3015. return fastdecode_tagdispatch(d, ptr, msg, table, hasbits, ret.tag);
  3016. }
  3017. goto again;
  3018. case FD_NEXT_OTHERFIELD:
  3019. return fastdecode_tagdispatch(d, ptr, msg, table, hasbits, ret.tag);
  3020. case FD_NEXT_ATLIMIT:
  3021. return ptr;
  3022. }
  3023. }
  3024. if (card != CARD_r && validate_utf8) {
  3025. return fastdecode_verifyutf8(d, ptr, msg, table, hasbits, dst);
  3026. }
  3027. return fastdecode_dispatch(d, ptr, msg, table, hasbits);
  3028. }
  3029. /* Generate all combinations:
  3030. * {p,c} x {s,o,r} x {s, b} x {1bt,2bt} */
  3031. #define s_VALIDATE true
  3032. #define b_VALIDATE false
  3033. #define F(card, tagbytes, type) \
  3034. UPB_NOINLINE \
  3035. const char *upb_c##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS) { \
  3036. return fastdecode_copystring(UPB_PARSE_ARGS, tagbytes, CARD_##card, \
  3037. type##_VALIDATE); \
  3038. } \
  3039. const char *upb_p##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS) { \
  3040. return fastdecode_string(UPB_PARSE_ARGS, tagbytes, CARD_##card, \
  3041. &upb_c##card##type##_##tagbytes##bt, \
  3042. type##_VALIDATE); \
  3043. }
  3044. #define UTF8(card, tagbytes) \
  3045. F(card, tagbytes, s) \
  3046. F(card, tagbytes, b)
  3047. #define TAGBYTES(card) \
  3048. UTF8(card, 1) \
  3049. UTF8(card, 2)
  3050. TAGBYTES(s)
  3051. TAGBYTES(o)
  3052. TAGBYTES(r)
  3053. #undef s_VALIDATE
  3054. #undef b_VALIDATE
  3055. #undef F
  3056. #undef TAGBYTES
  3057. /* message fields *************************************************************/
  3058. UPB_INLINE
  3059. upb_msg *decode_newmsg_ceil(upb_decstate *d, const upb_msglayout *l,
  3060. int msg_ceil_bytes) {
  3061. size_t size = l->size + sizeof(upb_msg_internal);
  3062. char *msg_data;
  3063. if (UPB_LIKELY(msg_ceil_bytes > 0 &&
  3064. _upb_arenahas(&d->arena) >= msg_ceil_bytes)) {
  3065. UPB_ASSERT(size <= (size_t)msg_ceil_bytes);
  3066. msg_data = d->arena.head.ptr;
  3067. d->arena.head.ptr += size;
  3068. UPB_UNPOISON_MEMORY_REGION(msg_data, msg_ceil_bytes);
  3069. memset(msg_data, 0, msg_ceil_bytes);
  3070. UPB_POISON_MEMORY_REGION(msg_data + size, msg_ceil_bytes - size);
  3071. } else {
  3072. msg_data = (char*)upb_arena_malloc(&d->arena, size);
  3073. memset(msg_data, 0, size);
  3074. }
  3075. return msg_data + sizeof(upb_msg_internal);
  3076. }
  3077. typedef struct {
  3078. intptr_t table;
  3079. upb_msg *msg;
  3080. } fastdecode_submsgdata;
  3081. UPB_FORCEINLINE
  3082. static const char *fastdecode_tosubmsg(upb_decstate *d, const char *ptr,
  3083. void *ctx) {
  3084. fastdecode_submsgdata *submsg = ctx;
  3085. ptr = fastdecode_dispatch(d, ptr, submsg->msg, submsg->table, 0);
  3086. UPB_ASSUME(ptr != NULL);
  3087. return ptr;
  3088. }
  3089. UPB_FORCEINLINE
  3090. static const char *fastdecode_submsg(UPB_PARSE_PARAMS, int tagbytes,
  3091. int msg_ceil_bytes, upb_card card) {
  3092. if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) {
  3093. RETURN_GENERIC("submessage field tag mismatch\n");
  3094. }
  3095. if (--d->depth == 0) return fastdecode_err(d);
  3096. upb_msg **dst;
  3097. uint32_t submsg_idx = (data >> 16) & 0xff;
  3098. const upb_msglayout *tablep = decode_totablep(table);
  3099. const upb_msglayout *subtablep = tablep->submsgs[submsg_idx];
  3100. fastdecode_submsgdata submsg = {decode_totable(subtablep)};
  3101. fastdecode_arr farr;
  3102. if (subtablep->table_mask == (uint8_t)-1) {
  3103. RETURN_GENERIC("submessage doesn't have fast tables.");
  3104. }
  3105. dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr,
  3106. sizeof(upb_msg *), card);
  3107. if (card == CARD_s) {
  3108. *(uint32_t*)msg |= hasbits;
  3109. hasbits = 0;
  3110. }
  3111. again:
  3112. if (card == CARD_r) {
  3113. dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_msg*));
  3114. }
  3115. submsg.msg = *dst;
  3116. if (card == CARD_r || UPB_LIKELY(!submsg.msg)) {
  3117. *dst = submsg.msg = decode_newmsg_ceil(d, subtablep, msg_ceil_bytes);
  3118. }
  3119. ptr += tagbytes;
  3120. ptr = fastdecode_delimited(d, ptr, fastdecode_tosubmsg, &submsg);
  3121. if (UPB_UNLIKELY(ptr == NULL || d->end_group != DECODE_NOGROUP)) {
  3122. return fastdecode_err(d);
  3123. }
  3124. if (card == CARD_r) {
  3125. fastdecode_nextret ret = fastdecode_nextrepeated(
  3126. d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_msg *));
  3127. switch (ret.next) {
  3128. case FD_NEXT_SAMEFIELD:
  3129. dst = ret.dst;
  3130. goto again;
  3131. case FD_NEXT_OTHERFIELD:
  3132. d->depth++;
  3133. return fastdecode_tagdispatch(d, ptr, msg, table, hasbits, ret.tag);
  3134. case FD_NEXT_ATLIMIT:
  3135. d->depth++;
  3136. return ptr;
  3137. }
  3138. }
  3139. d->depth++;
  3140. return fastdecode_dispatch(d, ptr, msg, table, hasbits);
  3141. }
  3142. #define F(card, tagbytes, size_ceil, ceil_arg) \
  3143. const char *upb_p##card##m_##tagbytes##bt_max##size_ceil##b( \
  3144. UPB_PARSE_PARAMS) { \
  3145. return fastdecode_submsg(UPB_PARSE_ARGS, tagbytes, ceil_arg, CARD_##card); \
  3146. }
  3147. #define SIZES(card, tagbytes) \
  3148. F(card, tagbytes, 64, 64) \
  3149. F(card, tagbytes, 128, 128) \
  3150. F(card, tagbytes, 192, 192) \
  3151. F(card, tagbytes, 256, 256) \
  3152. F(card, tagbytes, max, -1)
  3153. #define TAGBYTES(card) \
  3154. SIZES(card, 1) \
  3155. SIZES(card, 2)
  3156. TAGBYTES(s)
  3157. TAGBYTES(o)
  3158. TAGBYTES(r)
  3159. #undef TAGBYTES
  3160. #undef SIZES
  3161. #undef F
  3162. #endif /* UPB_FASTTABLE */
  3163. /* This file was generated by upbc (the upb compiler) from the input
  3164. * file:
  3165. *
  3166. * google/protobuf/descriptor.proto
  3167. *
  3168. * Do not edit -- your changes will be discarded when the file is
  3169. * regenerated. */
  3170. #include <stddef.h>
  3171. static const upb_msglayout *const google_protobuf_FileDescriptorSet_submsgs[1] = {
  3172. &google_protobuf_FileDescriptorProto_msginit,
  3173. };
  3174. static const upb_msglayout_field google_protobuf_FileDescriptorSet__fields[1] = {
  3175. {1, UPB_SIZE(0, 0), 0, 0, 11, 3},
  3176. };
  3177. const upb_msglayout google_protobuf_FileDescriptorSet_msginit = {
  3178. &google_protobuf_FileDescriptorSet_submsgs[0],
  3179. &google_protobuf_FileDescriptorSet__fields[0],
  3180. UPB_SIZE(8, 8), 1, false, 255,
  3181. };
  3182. static const upb_msglayout *const google_protobuf_FileDescriptorProto_submsgs[6] = {
  3183. &google_protobuf_DescriptorProto_msginit,
  3184. &google_protobuf_EnumDescriptorProto_msginit,
  3185. &google_protobuf_FieldDescriptorProto_msginit,
  3186. &google_protobuf_FileOptions_msginit,
  3187. &google_protobuf_ServiceDescriptorProto_msginit,
  3188. &google_protobuf_SourceCodeInfo_msginit,
  3189. };
  3190. static const upb_msglayout_field google_protobuf_FileDescriptorProto__fields[12] = {
  3191. {1, UPB_SIZE(4, 8), 1, 0, 12, 1},
  3192. {2, UPB_SIZE(12, 24), 2, 0, 12, 1},
  3193. {3, UPB_SIZE(36, 72), 0, 0, 12, 3},
  3194. {4, UPB_SIZE(40, 80), 0, 0, 11, 3},
  3195. {5, UPB_SIZE(44, 88), 0, 1, 11, 3},
  3196. {6, UPB_SIZE(48, 96), 0, 4, 11, 3},
  3197. {7, UPB_SIZE(52, 104), 0, 2, 11, 3},
  3198. {8, UPB_SIZE(28, 56), 3, 3, 11, 1},
  3199. {9, UPB_SIZE(32, 64), 4, 5, 11, 1},
  3200. {10, UPB_SIZE(56, 112), 0, 0, 5, 3},
  3201. {11, UPB_SIZE(60, 120), 0, 0, 5, 3},
  3202. {12, UPB_SIZE(20, 40), 5, 0, 12, 1},
  3203. };
  3204. const upb_msglayout google_protobuf_FileDescriptorProto_msginit = {
  3205. &google_protobuf_FileDescriptorProto_submsgs[0],
  3206. &google_protobuf_FileDescriptorProto__fields[0],
  3207. UPB_SIZE(64, 128), 12, false, 255,
  3208. };
  3209. static const upb_msglayout *const google_protobuf_DescriptorProto_submsgs[7] = {
  3210. &google_protobuf_DescriptorProto_msginit,
  3211. &google_protobuf_DescriptorProto_ExtensionRange_msginit,
  3212. &google_protobuf_DescriptorProto_ReservedRange_msginit,
  3213. &google_protobuf_EnumDescriptorProto_msginit,
  3214. &google_protobuf_FieldDescriptorProto_msginit,
  3215. &google_protobuf_MessageOptions_msginit,
  3216. &google_protobuf_OneofDescriptorProto_msginit,
  3217. };
  3218. static const upb_msglayout_field google_protobuf_DescriptorProto__fields[10] = {
  3219. {1, UPB_SIZE(4, 8), 1, 0, 12, 1},
  3220. {2, UPB_SIZE(16, 32), 0, 4, 11, 3},
  3221. {3, UPB_SIZE(20, 40), 0, 0, 11, 3},
  3222. {4, UPB_SIZE(24, 48), 0, 3, 11, 3},
  3223. {5, UPB_SIZE(28, 56), 0, 1, 11, 3},
  3224. {6, UPB_SIZE(32, 64), 0, 4, 11, 3},
  3225. {7, UPB_SIZE(12, 24), 2, 5, 11, 1},
  3226. {8, UPB_SIZE(36, 72), 0, 6, 11, 3},
  3227. {9, UPB_SIZE(40, 80), 0, 2, 11, 3},
  3228. {10, UPB_SIZE(44, 88), 0, 0, 12, 3},
  3229. };
  3230. const upb_msglayout google_protobuf_DescriptorProto_msginit = {
  3231. &google_protobuf_DescriptorProto_submsgs[0],
  3232. &google_protobuf_DescriptorProto__fields[0],
  3233. UPB_SIZE(48, 96), 10, false, 255,
  3234. };
  3235. static const upb_msglayout *const google_protobuf_DescriptorProto_ExtensionRange_submsgs[1] = {
  3236. &google_protobuf_ExtensionRangeOptions_msginit,
  3237. };
  3238. static const upb_msglayout_field google_protobuf_DescriptorProto_ExtensionRange__fields[3] = {
  3239. {1, UPB_SIZE(4, 4), 1, 0, 5, 1},
  3240. {2, UPB_SIZE(8, 8), 2, 0, 5, 1},
  3241. {3, UPB_SIZE(12, 16), 3, 0, 11, 1},
  3242. };
  3243. const upb_msglayout google_protobuf_DescriptorProto_ExtensionRange_msginit = {
  3244. &google_protobuf_DescriptorProto_ExtensionRange_submsgs[0],
  3245. &google_protobuf_DescriptorProto_ExtensionRange__fields[0],
  3246. UPB_SIZE(16, 24), 3, false, 255,
  3247. };
  3248. static const upb_msglayout_field google_protobuf_DescriptorProto_ReservedRange__fields[2] = {
  3249. {1, UPB_SIZE(4, 4), 1, 0, 5, 1},
  3250. {2, UPB_SIZE(8, 8), 2, 0, 5, 1},
  3251. };
  3252. const upb_msglayout google_protobuf_DescriptorProto_ReservedRange_msginit = {
  3253. NULL,
  3254. &google_protobuf_DescriptorProto_ReservedRange__fields[0],
  3255. UPB_SIZE(16, 16), 2, false, 255,
  3256. };
  3257. static const upb_msglayout *const google_protobuf_ExtensionRangeOptions_submsgs[1] = {
  3258. &google_protobuf_UninterpretedOption_msginit,
  3259. };
  3260. static const upb_msglayout_field google_protobuf_ExtensionRangeOptions__fields[1] = {
  3261. {999, UPB_SIZE(0, 0), 0, 0, 11, 3},
  3262. };
  3263. const upb_msglayout google_protobuf_ExtensionRangeOptions_msginit = {
  3264. &google_protobuf_ExtensionRangeOptions_submsgs[0],
  3265. &google_protobuf_ExtensionRangeOptions__fields[0],
  3266. UPB_SIZE(8, 8), 1, false, 255,
  3267. };
  3268. static const upb_msglayout *const google_protobuf_FieldDescriptorProto_submsgs[1] = {
  3269. &google_protobuf_FieldOptions_msginit,
  3270. };
  3271. static const upb_msglayout_field google_protobuf_FieldDescriptorProto__fields[11] = {
  3272. {1, UPB_SIZE(24, 24), 1, 0, 12, 1},
  3273. {2, UPB_SIZE(32, 40), 2, 0, 12, 1},
  3274. {3, UPB_SIZE(12, 12), 3, 0, 5, 1},
  3275. {4, UPB_SIZE(4, 4), 4, 0, 14, 1},
  3276. {5, UPB_SIZE(8, 8), 5, 0, 14, 1},
  3277. {6, UPB_SIZE(40, 56), 6, 0, 12, 1},
  3278. {7, UPB_SIZE(48, 72), 7, 0, 12, 1},
  3279. {8, UPB_SIZE(64, 104), 8, 0, 11, 1},
  3280. {9, UPB_SIZE(16, 16), 9, 0, 5, 1},
  3281. {10, UPB_SIZE(56, 88), 10, 0, 12, 1},
  3282. {17, UPB_SIZE(20, 20), 11, 0, 8, 1},
  3283. };
  3284. const upb_msglayout google_protobuf_FieldDescriptorProto_msginit = {
  3285. &google_protobuf_FieldDescriptorProto_submsgs[0],
  3286. &google_protobuf_FieldDescriptorProto__fields[0],
  3287. UPB_SIZE(72, 112), 11, false, 255,
  3288. };
  3289. static const upb_msglayout *const google_protobuf_OneofDescriptorProto_submsgs[1] = {
  3290. &google_protobuf_OneofOptions_msginit,
  3291. };
  3292. static const upb_msglayout_field google_protobuf_OneofDescriptorProto__fields[2] = {
  3293. {1, UPB_SIZE(4, 8), 1, 0, 12, 1},
  3294. {2, UPB_SIZE(12, 24), 2, 0, 11, 1},
  3295. };
  3296. const upb_msglayout google_protobuf_OneofDescriptorProto_msginit = {
  3297. &google_protobuf_OneofDescriptorProto_submsgs[0],
  3298. &google_protobuf_OneofDescriptorProto__fields[0],
  3299. UPB_SIZE(16, 32), 2, false, 255,
  3300. };
  3301. static const upb_msglayout *const google_protobuf_EnumDescriptorProto_submsgs[3] = {
  3302. &google_protobuf_EnumDescriptorProto_EnumReservedRange_msginit,
  3303. &google_protobuf_EnumOptions_msginit,
  3304. &google_protobuf_EnumValueDescriptorProto_msginit,
  3305. };
  3306. static const upb_msglayout_field google_protobuf_EnumDescriptorProto__fields[5] = {
  3307. {1, UPB_SIZE(4, 8), 1, 0, 12, 1},
  3308. {2, UPB_SIZE(16, 32), 0, 2, 11, 3},
  3309. {3, UPB_SIZE(12, 24), 2, 1, 11, 1},
  3310. {4, UPB_SIZE(20, 40), 0, 0, 11, 3},
  3311. {5, UPB_SIZE(24, 48), 0, 0, 12, 3},
  3312. };
  3313. const upb_msglayout google_protobuf_EnumDescriptorProto_msginit = {
  3314. &google_protobuf_EnumDescriptorProto_submsgs[0],
  3315. &google_protobuf_EnumDescriptorProto__fields[0],
  3316. UPB_SIZE(32, 64), 5, false, 255,
  3317. };
  3318. static const upb_msglayout_field google_protobuf_EnumDescriptorProto_EnumReservedRange__fields[2] = {
  3319. {1, UPB_SIZE(4, 4), 1, 0, 5, 1},
  3320. {2, UPB_SIZE(8, 8), 2, 0, 5, 1},
  3321. };
  3322. const upb_msglayout google_protobuf_EnumDescriptorProto_EnumReservedRange_msginit = {
  3323. NULL,
  3324. &google_protobuf_EnumDescriptorProto_EnumReservedRange__fields[0],
  3325. UPB_SIZE(16, 16), 2, false, 255,
  3326. };
  3327. static const upb_msglayout *const google_protobuf_EnumValueDescriptorProto_submsgs[1] = {
  3328. &google_protobuf_EnumValueOptions_msginit,
  3329. };
  3330. static const upb_msglayout_field google_protobuf_EnumValueDescriptorProto__fields[3] = {
  3331. {1, UPB_SIZE(8, 8), 1, 0, 12, 1},
  3332. {2, UPB_SIZE(4, 4), 2, 0, 5, 1},
  3333. {3, UPB_SIZE(16, 24), 3, 0, 11, 1},
  3334. };
  3335. const upb_msglayout google_protobuf_EnumValueDescriptorProto_msginit = {
  3336. &google_protobuf_EnumValueDescriptorProto_submsgs[0],
  3337. &google_protobuf_EnumValueDescriptorProto__fields[0],
  3338. UPB_SIZE(24, 32), 3, false, 255,
  3339. };
  3340. static const upb_msglayout *const google_protobuf_ServiceDescriptorProto_submsgs[2] = {
  3341. &google_protobuf_MethodDescriptorProto_msginit,
  3342. &google_protobuf_ServiceOptions_msginit,
  3343. };
  3344. static const upb_msglayout_field google_protobuf_ServiceDescriptorProto__fields[3] = {
  3345. {1, UPB_SIZE(4, 8), 1, 0, 12, 1},
  3346. {2, UPB_SIZE(16, 32), 0, 0, 11, 3},
  3347. {3, UPB_SIZE(12, 24), 2, 1, 11, 1},
  3348. };
  3349. const upb_msglayout google_protobuf_ServiceDescriptorProto_msginit = {
  3350. &google_protobuf_ServiceDescriptorProto_submsgs[0],
  3351. &google_protobuf_ServiceDescriptorProto__fields[0],
  3352. UPB_SIZE(24, 48), 3, false, 255,
  3353. };
  3354. static const upb_msglayout *const google_protobuf_MethodDescriptorProto_submsgs[1] = {
  3355. &google_protobuf_MethodOptions_msginit,
  3356. };
  3357. static const upb_msglayout_field google_protobuf_MethodDescriptorProto__fields[6] = {
  3358. {1, UPB_SIZE(4, 8), 1, 0, 12, 1},
  3359. {2, UPB_SIZE(12, 24), 2, 0, 12, 1},
  3360. {3, UPB_SIZE(20, 40), 3, 0, 12, 1},
  3361. {4, UPB_SIZE(28, 56), 4, 0, 11, 1},
  3362. {5, UPB_SIZE(1, 1), 5, 0, 8, 1},
  3363. {6, UPB_SIZE(2, 2), 6, 0, 8, 1},
  3364. };
  3365. const upb_msglayout google_protobuf_MethodDescriptorProto_msginit = {
  3366. &google_protobuf_MethodDescriptorProto_submsgs[0],
  3367. &google_protobuf_MethodDescriptorProto__fields[0],
  3368. UPB_SIZE(32, 64), 6, false, 255,
  3369. };
  3370. static const upb_msglayout *const google_protobuf_FileOptions_submsgs[1] = {
  3371. &google_protobuf_UninterpretedOption_msginit,
  3372. };
  3373. static const upb_msglayout_field google_protobuf_FileOptions__fields[21] = {
  3374. {1, UPB_SIZE(20, 24), 1, 0, 12, 1},
  3375. {8, UPB_SIZE(28, 40), 2, 0, 12, 1},
  3376. {9, UPB_SIZE(4, 4), 3, 0, 14, 1},
  3377. {10, UPB_SIZE(8, 8), 4, 0, 8, 1},
  3378. {11, UPB_SIZE(36, 56), 5, 0, 12, 1},
  3379. {16, UPB_SIZE(9, 9), 6, 0, 8, 1},
  3380. {17, UPB_SIZE(10, 10), 7, 0, 8, 1},
  3381. {18, UPB_SIZE(11, 11), 8, 0, 8, 1},
  3382. {20, UPB_SIZE(12, 12), 9, 0, 8, 1},
  3383. {23, UPB_SIZE(13, 13), 10, 0, 8, 1},
  3384. {27, UPB_SIZE(14, 14), 11, 0, 8, 1},
  3385. {31, UPB_SIZE(15, 15), 12, 0, 8, 1},
  3386. {36, UPB_SIZE(44, 72), 13, 0, 12, 1},
  3387. {37, UPB_SIZE(52, 88), 14, 0, 12, 1},
  3388. {39, UPB_SIZE(60, 104), 15, 0, 12, 1},
  3389. {40, UPB_SIZE(68, 120), 16, 0, 12, 1},
  3390. {41, UPB_SIZE(76, 136), 17, 0, 12, 1},
  3391. {42, UPB_SIZE(16, 16), 18, 0, 8, 1},
  3392. {44, UPB_SIZE(84, 152), 19, 0, 12, 1},
  3393. {45, UPB_SIZE(92, 168), 20, 0, 12, 1},
  3394. {999, UPB_SIZE(100, 184), 0, 0, 11, 3},
  3395. };
  3396. const upb_msglayout google_protobuf_FileOptions_msginit = {
  3397. &google_protobuf_FileOptions_submsgs[0],
  3398. &google_protobuf_FileOptions__fields[0],
  3399. UPB_SIZE(104, 192), 21, false, 255,
  3400. };
  3401. static const upb_msglayout *const google_protobuf_MessageOptions_submsgs[1] = {
  3402. &google_protobuf_UninterpretedOption_msginit,
  3403. };
  3404. static const upb_msglayout_field google_protobuf_MessageOptions__fields[5] = {
  3405. {1, UPB_SIZE(1, 1), 1, 0, 8, 1},
  3406. {2, UPB_SIZE(2, 2), 2, 0, 8, 1},
  3407. {3, UPB_SIZE(3, 3), 3, 0, 8, 1},
  3408. {7, UPB_SIZE(4, 4), 4, 0, 8, 1},
  3409. {999, UPB_SIZE(8, 8), 0, 0, 11, 3},
  3410. };
  3411. const upb_msglayout google_protobuf_MessageOptions_msginit = {
  3412. &google_protobuf_MessageOptions_submsgs[0],
  3413. &google_protobuf_MessageOptions__fields[0],
  3414. UPB_SIZE(16, 16), 5, false, 255,
  3415. };
  3416. static const upb_msglayout *const google_protobuf_FieldOptions_submsgs[1] = {
  3417. &google_protobuf_UninterpretedOption_msginit,
  3418. };
  3419. static const upb_msglayout_field google_protobuf_FieldOptions__fields[7] = {
  3420. {1, UPB_SIZE(4, 4), 1, 0, 14, 1},
  3421. {2, UPB_SIZE(12, 12), 2, 0, 8, 1},
  3422. {3, UPB_SIZE(13, 13), 3, 0, 8, 1},
  3423. {5, UPB_SIZE(14, 14), 4, 0, 8, 1},
  3424. {6, UPB_SIZE(8, 8), 5, 0, 14, 1},
  3425. {10, UPB_SIZE(15, 15), 6, 0, 8, 1},
  3426. {999, UPB_SIZE(16, 16), 0, 0, 11, 3},
  3427. };
  3428. const upb_msglayout google_protobuf_FieldOptions_msginit = {
  3429. &google_protobuf_FieldOptions_submsgs[0],
  3430. &google_protobuf_FieldOptions__fields[0],
  3431. UPB_SIZE(24, 24), 7, false, 255,
  3432. };
  3433. static const upb_msglayout *const google_protobuf_OneofOptions_submsgs[1] = {
  3434. &google_protobuf_UninterpretedOption_msginit,
  3435. };
  3436. static const upb_msglayout_field google_protobuf_OneofOptions__fields[1] = {
  3437. {999, UPB_SIZE(0, 0), 0, 0, 11, 3},
  3438. };
  3439. const upb_msglayout google_protobuf_OneofOptions_msginit = {
  3440. &google_protobuf_OneofOptions_submsgs[0],
  3441. &google_protobuf_OneofOptions__fields[0],
  3442. UPB_SIZE(8, 8), 1, false, 255,
  3443. };
  3444. static const upb_msglayout *const google_protobuf_EnumOptions_submsgs[1] = {
  3445. &google_protobuf_UninterpretedOption_msginit,
  3446. };
  3447. static const upb_msglayout_field google_protobuf_EnumOptions__fields[3] = {
  3448. {2, UPB_SIZE(1, 1), 1, 0, 8, 1},
  3449. {3, UPB_SIZE(2, 2), 2, 0, 8, 1},
  3450. {999, UPB_SIZE(4, 8), 0, 0, 11, 3},
  3451. };
  3452. const upb_msglayout google_protobuf_EnumOptions_msginit = {
  3453. &google_protobuf_EnumOptions_submsgs[0],
  3454. &google_protobuf_EnumOptions__fields[0],
  3455. UPB_SIZE(8, 16), 3, false, 255,
  3456. };
  3457. static const upb_msglayout *const google_protobuf_EnumValueOptions_submsgs[1] = {
  3458. &google_protobuf_UninterpretedOption_msginit,
  3459. };
  3460. static const upb_msglayout_field google_protobuf_EnumValueOptions__fields[2] = {
  3461. {1, UPB_SIZE(1, 1), 1, 0, 8, 1},
  3462. {999, UPB_SIZE(4, 8), 0, 0, 11, 3},
  3463. };
  3464. const upb_msglayout google_protobuf_EnumValueOptions_msginit = {
  3465. &google_protobuf_EnumValueOptions_submsgs[0],
  3466. &google_protobuf_EnumValueOptions__fields[0],
  3467. UPB_SIZE(8, 16), 2, false, 255,
  3468. };
  3469. static const upb_msglayout *const google_protobuf_ServiceOptions_submsgs[1] = {
  3470. &google_protobuf_UninterpretedOption_msginit,
  3471. };
  3472. static const upb_msglayout_field google_protobuf_ServiceOptions__fields[2] = {
  3473. {33, UPB_SIZE(1, 1), 1, 0, 8, 1},
  3474. {999, UPB_SIZE(4, 8), 0, 0, 11, 3},
  3475. };
  3476. const upb_msglayout google_protobuf_ServiceOptions_msginit = {
  3477. &google_protobuf_ServiceOptions_submsgs[0],
  3478. &google_protobuf_ServiceOptions__fields[0],
  3479. UPB_SIZE(8, 16), 2, false, 255,
  3480. };
  3481. static const upb_msglayout *const google_protobuf_MethodOptions_submsgs[1] = {
  3482. &google_protobuf_UninterpretedOption_msginit,
  3483. };
  3484. static const upb_msglayout_field google_protobuf_MethodOptions__fields[3] = {
  3485. {33, UPB_SIZE(8, 8), 1, 0, 8, 1},
  3486. {34, UPB_SIZE(4, 4), 2, 0, 14, 1},
  3487. {999, UPB_SIZE(12, 16), 0, 0, 11, 3},
  3488. };
  3489. const upb_msglayout google_protobuf_MethodOptions_msginit = {
  3490. &google_protobuf_MethodOptions_submsgs[0],
  3491. &google_protobuf_MethodOptions__fields[0],
  3492. UPB_SIZE(16, 24), 3, false, 255,
  3493. };
  3494. static const upb_msglayout *const google_protobuf_UninterpretedOption_submsgs[1] = {
  3495. &google_protobuf_UninterpretedOption_NamePart_msginit,
  3496. };
  3497. static const upb_msglayout_field google_protobuf_UninterpretedOption__fields[7] = {
  3498. {2, UPB_SIZE(56, 80), 0, 0, 11, 3},
  3499. {3, UPB_SIZE(32, 32), 1, 0, 12, 1},
  3500. {4, UPB_SIZE(8, 8), 2, 0, 4, 1},
  3501. {5, UPB_SIZE(16, 16), 3, 0, 3, 1},
  3502. {6, UPB_SIZE(24, 24), 4, 0, 1, 1},
  3503. {7, UPB_SIZE(40, 48), 5, 0, 12, 1},
  3504. {8, UPB_SIZE(48, 64), 6, 0, 12, 1},
  3505. };
  3506. const upb_msglayout google_protobuf_UninterpretedOption_msginit = {
  3507. &google_protobuf_UninterpretedOption_submsgs[0],
  3508. &google_protobuf_UninterpretedOption__fields[0],
  3509. UPB_SIZE(64, 96), 7, false, 255,
  3510. };
  3511. static const upb_msglayout_field google_protobuf_UninterpretedOption_NamePart__fields[2] = {
  3512. {1, UPB_SIZE(4, 8), 1, 0, 12, 2},
  3513. {2, UPB_SIZE(1, 1), 2, 0, 8, 2},
  3514. };
  3515. const upb_msglayout google_protobuf_UninterpretedOption_NamePart_msginit = {
  3516. NULL,
  3517. &google_protobuf_UninterpretedOption_NamePart__fields[0],
  3518. UPB_SIZE(16, 32), 2, false, 255,
  3519. };
  3520. static const upb_msglayout *const google_protobuf_SourceCodeInfo_submsgs[1] = {
  3521. &google_protobuf_SourceCodeInfo_Location_msginit,
  3522. };
  3523. static const upb_msglayout_field google_protobuf_SourceCodeInfo__fields[1] = {
  3524. {1, UPB_SIZE(0, 0), 0, 0, 11, 3},
  3525. };
  3526. const upb_msglayout google_protobuf_SourceCodeInfo_msginit = {
  3527. &google_protobuf_SourceCodeInfo_submsgs[0],
  3528. &google_protobuf_SourceCodeInfo__fields[0],
  3529. UPB_SIZE(8, 8), 1, false, 255,
  3530. };
  3531. static const upb_msglayout_field google_protobuf_SourceCodeInfo_Location__fields[5] = {
  3532. {1, UPB_SIZE(20, 40), 0, 0, 5, _UPB_LABEL_PACKED},
  3533. {2, UPB_SIZE(24, 48), 0, 0, 5, _UPB_LABEL_PACKED},
  3534. {3, UPB_SIZE(4, 8), 1, 0, 12, 1},
  3535. {4, UPB_SIZE(12, 24), 2, 0, 12, 1},
  3536. {6, UPB_SIZE(28, 56), 0, 0, 12, 3},
  3537. };
  3538. const upb_msglayout google_protobuf_SourceCodeInfo_Location_msginit = {
  3539. NULL,
  3540. &google_protobuf_SourceCodeInfo_Location__fields[0],
  3541. UPB_SIZE(32, 64), 5, false, 255,
  3542. };
  3543. static const upb_msglayout *const google_protobuf_GeneratedCodeInfo_submsgs[1] = {
  3544. &google_protobuf_GeneratedCodeInfo_Annotation_msginit,
  3545. };
  3546. static const upb_msglayout_field google_protobuf_GeneratedCodeInfo__fields[1] = {
  3547. {1, UPB_SIZE(0, 0), 0, 0, 11, 3},
  3548. };
  3549. const upb_msglayout google_protobuf_GeneratedCodeInfo_msginit = {
  3550. &google_protobuf_GeneratedCodeInfo_submsgs[0],
  3551. &google_protobuf_GeneratedCodeInfo__fields[0],
  3552. UPB_SIZE(8, 8), 1, false, 255,
  3553. };
  3554. static const upb_msglayout_field google_protobuf_GeneratedCodeInfo_Annotation__fields[4] = {
  3555. {1, UPB_SIZE(20, 32), 0, 0, 5, _UPB_LABEL_PACKED},
  3556. {2, UPB_SIZE(12, 16), 1, 0, 12, 1},
  3557. {3, UPB_SIZE(4, 4), 2, 0, 5, 1},
  3558. {4, UPB_SIZE(8, 8), 3, 0, 5, 1},
  3559. };
  3560. const upb_msglayout google_protobuf_GeneratedCodeInfo_Annotation_msginit = {
  3561. NULL,
  3562. &google_protobuf_GeneratedCodeInfo_Annotation__fields[0],
  3563. UPB_SIZE(24, 48), 4, false, 255,
  3564. };
  3565. #include <ctype.h>
  3566. #include <errno.h>
  3567. #include <setjmp.h>
  3568. #include <stdlib.h>
  3569. #include <string.h>
  3570. /* Must be last. */
  3571. typedef struct {
  3572. size_t len;
  3573. char str[1]; /* Null-terminated string data follows. */
  3574. } str_t;
  3575. struct upb_fielddef {
  3576. const upb_filedef *file;
  3577. const upb_msgdef *msgdef;
  3578. const char *full_name;
  3579. const char *json_name;
  3580. union {
  3581. int64_t sint;
  3582. uint64_t uint;
  3583. double dbl;
  3584. float flt;
  3585. bool boolean;
  3586. str_t *str;
  3587. } defaultval;
  3588. const upb_oneofdef *oneof;
  3589. union {
  3590. const upb_msgdef *msgdef;
  3591. const upb_enumdef *enumdef;
  3592. const google_protobuf_FieldDescriptorProto *unresolved;
  3593. } sub;
  3594. uint32_t number_;
  3595. uint16_t index_;
  3596. uint16_t layout_index;
  3597. uint32_t selector_base; /* Used to index into a upb::Handlers table. */
  3598. bool is_extension_;
  3599. bool lazy_;
  3600. bool packed_;
  3601. bool proto3_optional_;
  3602. upb_descriptortype_t type_;
  3603. upb_label_t label_;
  3604. };
  3605. struct upb_msgdef {
  3606. const upb_msglayout *layout;
  3607. const upb_filedef *file;
  3608. const char *full_name;
  3609. uint32_t selector_count;
  3610. uint32_t submsg_field_count;
  3611. /* Tables for looking up fields by number and name. */
  3612. upb_inttable itof;
  3613. upb_strtable ntof;
  3614. const upb_fielddef *fields;
  3615. const upb_oneofdef *oneofs;
  3616. int field_count;
  3617. int oneof_count;
  3618. int real_oneof_count;
  3619. /* Is this a map-entry message? */
  3620. bool map_entry;
  3621. upb_wellknowntype_t well_known_type;
  3622. /* TODO(haberman): proper extension ranges (there can be multiple). */
  3623. };
  3624. struct upb_enumdef {
  3625. const upb_filedef *file;
  3626. const char *full_name;
  3627. upb_strtable ntoi;
  3628. upb_inttable iton;
  3629. int32_t defaultval;
  3630. };
  3631. struct upb_oneofdef {
  3632. const upb_msgdef *parent;
  3633. const char *full_name;
  3634. int field_count;
  3635. bool synthetic;
  3636. const upb_fielddef **fields;
  3637. upb_strtable ntof;
  3638. upb_inttable itof;
  3639. };
  3640. struct upb_filedef {
  3641. const char *name;
  3642. const char *package;
  3643. const char *phpprefix;
  3644. const char *phpnamespace;
  3645. const upb_filedef **deps;
  3646. const upb_msgdef *msgs;
  3647. const upb_enumdef *enums;
  3648. const upb_fielddef *exts;
  3649. const upb_symtab *symtab;
  3650. int dep_count;
  3651. int msg_count;
  3652. int enum_count;
  3653. int ext_count;
  3654. upb_syntax_t syntax;
  3655. };
  3656. struct upb_symtab {
  3657. upb_arena *arena;
  3658. upb_strtable syms; /* full_name -> packed def ptr */
  3659. upb_strtable files; /* file_name -> upb_filedef* */
  3660. size_t bytes_loaded;
  3661. };
  3662. /* Inside a symtab we store tagged pointers to specific def types. */
  3663. typedef enum {
  3664. UPB_DEFTYPE_FIELD = 0,
  3665. /* Only inside symtab table. */
  3666. UPB_DEFTYPE_MSG = 1,
  3667. UPB_DEFTYPE_ENUM = 2,
  3668. /* Only inside message table. */
  3669. UPB_DEFTYPE_ONEOF = 1,
  3670. UPB_DEFTYPE_FIELD_JSONNAME = 2
  3671. } upb_deftype_t;
  3672. static const void *unpack_def(upb_value v, upb_deftype_t type) {
  3673. uintptr_t num = (uintptr_t)upb_value_getconstptr(v);
  3674. return (num & 3) == type ? (const void*)(num & ~3) : NULL;
  3675. }
  3676. static upb_value pack_def(const void *ptr, upb_deftype_t type) {
  3677. uintptr_t num = (uintptr_t)ptr | type;
  3678. return upb_value_constptr((const void*)num);
  3679. }
  3680. /* isalpha() etc. from <ctype.h> are locale-dependent, which we don't want. */
  3681. static bool upb_isbetween(char c, char low, char high) {
  3682. return c >= low && c <= high;
  3683. }
  3684. static bool upb_isletter(char c) {
  3685. return upb_isbetween(c, 'A', 'Z') || upb_isbetween(c, 'a', 'z') || c == '_';
  3686. }
  3687. static bool upb_isalphanum(char c) {
  3688. return upb_isletter(c) || upb_isbetween(c, '0', '9');
  3689. }
  3690. static const char *shortdefname(const char *fullname) {
  3691. const char *p;
  3692. if (fullname == NULL) {
  3693. return NULL;
  3694. } else if ((p = strrchr(fullname, '.')) == NULL) {
  3695. /* No '.' in the name, return the full string. */
  3696. return fullname;
  3697. } else {
  3698. /* Return one past the last '.'. */
  3699. return p + 1;
  3700. }
  3701. }
  3702. /* All submessage fields are lower than all other fields.
  3703. * Secondly, fields are increasing in order. */
  3704. uint32_t field_rank(const upb_fielddef *f) {
  3705. uint32_t ret = upb_fielddef_number(f);
  3706. const uint32_t high_bit = 1 << 30;
  3707. UPB_ASSERT(ret < high_bit);
  3708. if (!upb_fielddef_issubmsg(f))
  3709. ret |= high_bit;
  3710. return ret;
  3711. }
  3712. int cmp_fields(const void *p1, const void *p2) {
  3713. const upb_fielddef *f1 = *(upb_fielddef*const*)p1;
  3714. const upb_fielddef *f2 = *(upb_fielddef*const*)p2;
  3715. return field_rank(f1) - field_rank(f2);
  3716. }
  3717. /* A few implementation details of handlers. We put these here to avoid
  3718. * a def -> handlers dependency. */
  3719. #define UPB_STATIC_SELECTOR_COUNT 3 /* Warning: also in upb/handlers.h. */
  3720. static uint32_t upb_handlers_selectorbaseoffset(const upb_fielddef *f) {
  3721. return upb_fielddef_isseq(f) ? 2 : 0;
  3722. }
  3723. static uint32_t upb_handlers_selectorcount(const upb_fielddef *f) {
  3724. uint32_t ret = 1;
  3725. if (upb_fielddef_isseq(f)) ret += 2; /* STARTSEQ/ENDSEQ */
  3726. if (upb_fielddef_isstring(f)) ret += 2; /* [STRING]/STARTSTR/ENDSTR */
  3727. if (upb_fielddef_issubmsg(f)) {
  3728. /* ENDSUBMSG (STARTSUBMSG is at table beginning) */
  3729. ret += 0;
  3730. if (upb_fielddef_lazy(f)) {
  3731. /* STARTSTR/ENDSTR/STRING (for lazy) */
  3732. ret += 3;
  3733. }
  3734. }
  3735. return ret;
  3736. }
  3737. static void upb_status_setoom(upb_status *status) {
  3738. upb_status_seterrmsg(status, "out of memory");
  3739. }
  3740. static void assign_msg_wellknowntype(upb_msgdef *m) {
  3741. const char *name = upb_msgdef_fullname(m);
  3742. if (name == NULL) {
  3743. m->well_known_type = UPB_WELLKNOWN_UNSPECIFIED;
  3744. return;
  3745. }
  3746. if (!strcmp(name, "google.protobuf.Any")) {
  3747. m->well_known_type = UPB_WELLKNOWN_ANY;
  3748. } else if (!strcmp(name, "google.protobuf.FieldMask")) {
  3749. m->well_known_type = UPB_WELLKNOWN_FIELDMASK;
  3750. } else if (!strcmp(name, "google.protobuf.Duration")) {
  3751. m->well_known_type = UPB_WELLKNOWN_DURATION;
  3752. } else if (!strcmp(name, "google.protobuf.Timestamp")) {
  3753. m->well_known_type = UPB_WELLKNOWN_TIMESTAMP;
  3754. } else if (!strcmp(name, "google.protobuf.DoubleValue")) {
  3755. m->well_known_type = UPB_WELLKNOWN_DOUBLEVALUE;
  3756. } else if (!strcmp(name, "google.protobuf.FloatValue")) {
  3757. m->well_known_type = UPB_WELLKNOWN_FLOATVALUE;
  3758. } else if (!strcmp(name, "google.protobuf.Int64Value")) {
  3759. m->well_known_type = UPB_WELLKNOWN_INT64VALUE;
  3760. } else if (!strcmp(name, "google.protobuf.UInt64Value")) {
  3761. m->well_known_type = UPB_WELLKNOWN_UINT64VALUE;
  3762. } else if (!strcmp(name, "google.protobuf.Int32Value")) {
  3763. m->well_known_type = UPB_WELLKNOWN_INT32VALUE;
  3764. } else if (!strcmp(name, "google.protobuf.UInt32Value")) {
  3765. m->well_known_type = UPB_WELLKNOWN_UINT32VALUE;
  3766. } else if (!strcmp(name, "google.protobuf.BoolValue")) {
  3767. m->well_known_type = UPB_WELLKNOWN_BOOLVALUE;
  3768. } else if (!strcmp(name, "google.protobuf.StringValue")) {
  3769. m->well_known_type = UPB_WELLKNOWN_STRINGVALUE;
  3770. } else if (!strcmp(name, "google.protobuf.BytesValue")) {
  3771. m->well_known_type = UPB_WELLKNOWN_BYTESVALUE;
  3772. } else if (!strcmp(name, "google.protobuf.Value")) {
  3773. m->well_known_type = UPB_WELLKNOWN_VALUE;
  3774. } else if (!strcmp(name, "google.protobuf.ListValue")) {
  3775. m->well_known_type = UPB_WELLKNOWN_LISTVALUE;
  3776. } else if (!strcmp(name, "google.protobuf.Struct")) {
  3777. m->well_known_type = UPB_WELLKNOWN_STRUCT;
  3778. } else {
  3779. m->well_known_type = UPB_WELLKNOWN_UNSPECIFIED;
  3780. }
  3781. }
  3782. /* upb_enumdef ****************************************************************/
  3783. const char *upb_enumdef_fullname(const upb_enumdef *e) {
  3784. return e->full_name;
  3785. }
  3786. const char *upb_enumdef_name(const upb_enumdef *e) {
  3787. return shortdefname(e->full_name);
  3788. }
  3789. const upb_filedef *upb_enumdef_file(const upb_enumdef *e) {
  3790. return e->file;
  3791. }
  3792. int32_t upb_enumdef_default(const upb_enumdef *e) {
  3793. UPB_ASSERT(upb_enumdef_iton(e, e->defaultval));
  3794. return e->defaultval;
  3795. }
  3796. int upb_enumdef_numvals(const upb_enumdef *e) {
  3797. return (int)upb_strtable_count(&e->ntoi);
  3798. }
  3799. void upb_enum_begin(upb_enum_iter *i, const upb_enumdef *e) {
  3800. /* We iterate over the ntoi table, to account for duplicate numbers. */
  3801. upb_strtable_begin(i, &e->ntoi);
  3802. }
  3803. void upb_enum_next(upb_enum_iter *iter) { upb_strtable_next(iter); }
  3804. bool upb_enum_done(upb_enum_iter *iter) { return upb_strtable_done(iter); }
  3805. bool upb_enumdef_ntoi(const upb_enumdef *def, const char *name,
  3806. size_t len, int32_t *num) {
  3807. upb_value v;
  3808. if (!upb_strtable_lookup2(&def->ntoi, name, len, &v)) {
  3809. return false;
  3810. }
  3811. if (num) *num = upb_value_getint32(v);
  3812. return true;
  3813. }
  3814. const char *upb_enumdef_iton(const upb_enumdef *def, int32_t num) {
  3815. upb_value v;
  3816. return upb_inttable_lookup32(&def->iton, num, &v) ?
  3817. upb_value_getcstr(v) : NULL;
  3818. }
  3819. const char *upb_enum_iter_name(upb_enum_iter *iter) {
  3820. return upb_strtable_iter_key(iter).data;
  3821. }
  3822. int32_t upb_enum_iter_number(upb_enum_iter *iter) {
  3823. return upb_value_getint32(upb_strtable_iter_value(iter));
  3824. }
  3825. /* upb_fielddef ***************************************************************/
  3826. const char *upb_fielddef_fullname(const upb_fielddef *f) {
  3827. return f->full_name;
  3828. }
  3829. upb_fieldtype_t upb_fielddef_type(const upb_fielddef *f) {
  3830. switch (f->type_) {
  3831. case UPB_DESCRIPTOR_TYPE_DOUBLE:
  3832. return UPB_TYPE_DOUBLE;
  3833. case UPB_DESCRIPTOR_TYPE_FLOAT:
  3834. return UPB_TYPE_FLOAT;
  3835. case UPB_DESCRIPTOR_TYPE_INT64:
  3836. case UPB_DESCRIPTOR_TYPE_SINT64:
  3837. case UPB_DESCRIPTOR_TYPE_SFIXED64:
  3838. return UPB_TYPE_INT64;
  3839. case UPB_DESCRIPTOR_TYPE_INT32:
  3840. case UPB_DESCRIPTOR_TYPE_SFIXED32:
  3841. case UPB_DESCRIPTOR_TYPE_SINT32:
  3842. return UPB_TYPE_INT32;
  3843. case UPB_DESCRIPTOR_TYPE_UINT64:
  3844. case UPB_DESCRIPTOR_TYPE_FIXED64:
  3845. return UPB_TYPE_UINT64;
  3846. case UPB_DESCRIPTOR_TYPE_UINT32:
  3847. case UPB_DESCRIPTOR_TYPE_FIXED32:
  3848. return UPB_TYPE_UINT32;
  3849. case UPB_DESCRIPTOR_TYPE_ENUM:
  3850. return UPB_TYPE_ENUM;
  3851. case UPB_DESCRIPTOR_TYPE_BOOL:
  3852. return UPB_TYPE_BOOL;
  3853. case UPB_DESCRIPTOR_TYPE_STRING:
  3854. return UPB_TYPE_STRING;
  3855. case UPB_DESCRIPTOR_TYPE_BYTES:
  3856. return UPB_TYPE_BYTES;
  3857. case UPB_DESCRIPTOR_TYPE_GROUP:
  3858. case UPB_DESCRIPTOR_TYPE_MESSAGE:
  3859. return UPB_TYPE_MESSAGE;
  3860. }
  3861. UPB_UNREACHABLE();
  3862. }
  3863. upb_descriptortype_t upb_fielddef_descriptortype(const upb_fielddef *f) {
  3864. return f->type_;
  3865. }
  3866. uint32_t upb_fielddef_index(const upb_fielddef *f) {
  3867. return f->index_;
  3868. }
  3869. upb_label_t upb_fielddef_label(const upb_fielddef *f) {
  3870. return f->label_;
  3871. }
  3872. uint32_t upb_fielddef_number(const upb_fielddef *f) {
  3873. return f->number_;
  3874. }
  3875. bool upb_fielddef_isextension(const upb_fielddef *f) {
  3876. return f->is_extension_;
  3877. }
  3878. bool upb_fielddef_lazy(const upb_fielddef *f) {
  3879. return f->lazy_;
  3880. }
  3881. bool upb_fielddef_packed(const upb_fielddef *f) {
  3882. return f->packed_;
  3883. }
  3884. const char *upb_fielddef_name(const upb_fielddef *f) {
  3885. return shortdefname(f->full_name);
  3886. }
  3887. const char *upb_fielddef_jsonname(const upb_fielddef *f) {
  3888. return f->json_name;
  3889. }
  3890. uint32_t upb_fielddef_selectorbase(const upb_fielddef *f) {
  3891. return f->selector_base;
  3892. }
  3893. const upb_filedef *upb_fielddef_file(const upb_fielddef *f) {
  3894. return f->file;
  3895. }
  3896. const upb_msgdef *upb_fielddef_containingtype(const upb_fielddef *f) {
  3897. return f->msgdef;
  3898. }
  3899. const upb_oneofdef *upb_fielddef_containingoneof(const upb_fielddef *f) {
  3900. return f->oneof;
  3901. }
  3902. const upb_oneofdef *upb_fielddef_realcontainingoneof(const upb_fielddef *f) {
  3903. if (!f->oneof || upb_oneofdef_issynthetic(f->oneof)) return NULL;
  3904. return f->oneof;
  3905. }
  3906. upb_msgval upb_fielddef_default(const upb_fielddef *f) {
  3907. UPB_ASSERT(!upb_fielddef_issubmsg(f));
  3908. upb_msgval ret;
  3909. if (upb_fielddef_isstring(f)) {
  3910. str_t *str = f->defaultval.str;
  3911. if (str) {
  3912. ret.str_val.data = str->str;
  3913. ret.str_val.size = str->len;
  3914. } else {
  3915. ret.str_val.size = 0;
  3916. }
  3917. } else {
  3918. memcpy(&ret, &f->defaultval, 8);
  3919. }
  3920. return ret;
  3921. }
  3922. static void chkdefaulttype(const upb_fielddef *f, int ctype) {
  3923. UPB_UNUSED(f);
  3924. UPB_UNUSED(ctype);
  3925. }
  3926. int64_t upb_fielddef_defaultint64(const upb_fielddef *f) {
  3927. chkdefaulttype(f, UPB_TYPE_INT64);
  3928. return f->defaultval.sint;
  3929. }
  3930. int32_t upb_fielddef_defaultint32(const upb_fielddef *f) {
  3931. chkdefaulttype(f, UPB_TYPE_INT32);
  3932. return (int32_t)f->defaultval.sint;
  3933. }
  3934. uint64_t upb_fielddef_defaultuint64(const upb_fielddef *f) {
  3935. chkdefaulttype(f, UPB_TYPE_UINT64);
  3936. return f->defaultval.uint;
  3937. }
  3938. uint32_t upb_fielddef_defaultuint32(const upb_fielddef *f) {
  3939. chkdefaulttype(f, UPB_TYPE_UINT32);
  3940. return (uint32_t)f->defaultval.uint;
  3941. }
  3942. bool upb_fielddef_defaultbool(const upb_fielddef *f) {
  3943. chkdefaulttype(f, UPB_TYPE_BOOL);
  3944. return f->defaultval.boolean;
  3945. }
  3946. float upb_fielddef_defaultfloat(const upb_fielddef *f) {
  3947. chkdefaulttype(f, UPB_TYPE_FLOAT);
  3948. return f->defaultval.flt;
  3949. }
  3950. double upb_fielddef_defaultdouble(const upb_fielddef *f) {
  3951. chkdefaulttype(f, UPB_TYPE_DOUBLE);
  3952. return f->defaultval.dbl;
  3953. }
  3954. const char *upb_fielddef_defaultstr(const upb_fielddef *f, size_t *len) {
  3955. str_t *str = f->defaultval.str;
  3956. UPB_ASSERT(upb_fielddef_type(f) == UPB_TYPE_STRING ||
  3957. upb_fielddef_type(f) == UPB_TYPE_BYTES ||
  3958. upb_fielddef_type(f) == UPB_TYPE_ENUM);
  3959. if (str) {
  3960. if (len) *len = str->len;
  3961. return str->str;
  3962. } else {
  3963. if (len) *len = 0;
  3964. return NULL;
  3965. }
  3966. }
  3967. const upb_msgdef *upb_fielddef_msgsubdef(const upb_fielddef *f) {
  3968. return upb_fielddef_type(f) == UPB_TYPE_MESSAGE ? f->sub.msgdef : NULL;
  3969. }
  3970. const upb_enumdef *upb_fielddef_enumsubdef(const upb_fielddef *f) {
  3971. return upb_fielddef_type(f) == UPB_TYPE_ENUM ? f->sub.enumdef : NULL;
  3972. }
  3973. const upb_msglayout_field *upb_fielddef_layout(const upb_fielddef *f) {
  3974. return &f->msgdef->layout->fields[f->layout_index];
  3975. }
  3976. bool upb_fielddef_issubmsg(const upb_fielddef *f) {
  3977. return upb_fielddef_type(f) == UPB_TYPE_MESSAGE;
  3978. }
  3979. bool upb_fielddef_isstring(const upb_fielddef *f) {
  3980. return upb_fielddef_type(f) == UPB_TYPE_STRING ||
  3981. upb_fielddef_type(f) == UPB_TYPE_BYTES;
  3982. }
  3983. bool upb_fielddef_isseq(const upb_fielddef *f) {
  3984. return upb_fielddef_label(f) == UPB_LABEL_REPEATED;
  3985. }
  3986. bool upb_fielddef_isprimitive(const upb_fielddef *f) {
  3987. return !upb_fielddef_isstring(f) && !upb_fielddef_issubmsg(f);
  3988. }
  3989. bool upb_fielddef_ismap(const upb_fielddef *f) {
  3990. return upb_fielddef_isseq(f) && upb_fielddef_issubmsg(f) &&
  3991. upb_msgdef_mapentry(upb_fielddef_msgsubdef(f));
  3992. }
  3993. bool upb_fielddef_hassubdef(const upb_fielddef *f) {
  3994. return upb_fielddef_issubmsg(f) || upb_fielddef_type(f) == UPB_TYPE_ENUM;
  3995. }
  3996. bool upb_fielddef_haspresence(const upb_fielddef *f) {
  3997. if (upb_fielddef_isseq(f)) return false;
  3998. return upb_fielddef_issubmsg(f) || upb_fielddef_containingoneof(f) ||
  3999. f->file->syntax == UPB_SYNTAX_PROTO2;
  4000. }
  4001. static bool between(int32_t x, int32_t low, int32_t high) {
  4002. return x >= low && x <= high;
  4003. }
  4004. bool upb_fielddef_checklabel(int32_t label) { return between(label, 1, 3); }
  4005. bool upb_fielddef_checktype(int32_t type) { return between(type, 1, 11); }
  4006. bool upb_fielddef_checkintfmt(int32_t fmt) { return between(fmt, 1, 3); }
  4007. bool upb_fielddef_checkdescriptortype(int32_t type) {
  4008. return between(type, 1, 18);
  4009. }
  4010. /* upb_msgdef *****************************************************************/
  4011. const char *upb_msgdef_fullname(const upb_msgdef *m) {
  4012. return m->full_name;
  4013. }
  4014. const upb_filedef *upb_msgdef_file(const upb_msgdef *m) {
  4015. return m->file;
  4016. }
  4017. const char *upb_msgdef_name(const upb_msgdef *m) {
  4018. return shortdefname(m->full_name);
  4019. }
  4020. upb_syntax_t upb_msgdef_syntax(const upb_msgdef *m) {
  4021. return m->file->syntax;
  4022. }
  4023. size_t upb_msgdef_selectorcount(const upb_msgdef *m) {
  4024. return m->selector_count;
  4025. }
  4026. uint32_t upb_msgdef_submsgfieldcount(const upb_msgdef *m) {
  4027. return m->submsg_field_count;
  4028. }
  4029. const upb_fielddef *upb_msgdef_itof(const upb_msgdef *m, uint32_t i) {
  4030. upb_value val;
  4031. return upb_inttable_lookup32(&m->itof, i, &val) ?
  4032. upb_value_getconstptr(val) : NULL;
  4033. }
  4034. const upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name,
  4035. size_t len) {
  4036. upb_value val;
  4037. if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
  4038. return NULL;
  4039. }
  4040. return unpack_def(val, UPB_DEFTYPE_FIELD);
  4041. }
  4042. const upb_oneofdef *upb_msgdef_ntoo(const upb_msgdef *m, const char *name,
  4043. size_t len) {
  4044. upb_value val;
  4045. if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
  4046. return NULL;
  4047. }
  4048. return unpack_def(val, UPB_DEFTYPE_ONEOF);
  4049. }
  4050. bool upb_msgdef_lookupname(const upb_msgdef *m, const char *name, size_t len,
  4051. const upb_fielddef **f, const upb_oneofdef **o) {
  4052. upb_value val;
  4053. if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
  4054. return false;
  4055. }
  4056. *o = unpack_def(val, UPB_DEFTYPE_ONEOF);
  4057. *f = unpack_def(val, UPB_DEFTYPE_FIELD);
  4058. return *o || *f; /* False if this was a JSON name. */
  4059. }
  4060. const upb_fielddef *upb_msgdef_lookupjsonname(const upb_msgdef *m,
  4061. const char *name, size_t len) {
  4062. upb_value val;
  4063. const upb_fielddef* f;
  4064. if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
  4065. return NULL;
  4066. }
  4067. f = unpack_def(val, UPB_DEFTYPE_FIELD);
  4068. if (!f) f = unpack_def(val, UPB_DEFTYPE_FIELD_JSONNAME);
  4069. return f;
  4070. }
  4071. int upb_msgdef_numfields(const upb_msgdef *m) {
  4072. return m->field_count;
  4073. }
  4074. int upb_msgdef_numoneofs(const upb_msgdef *m) {
  4075. return m->oneof_count;
  4076. }
  4077. int upb_msgdef_numrealoneofs(const upb_msgdef *m) {
  4078. return m->real_oneof_count;
  4079. }
  4080. int upb_msgdef_fieldcount(const upb_msgdef *m) {
  4081. return m->field_count;
  4082. }
  4083. int upb_msgdef_oneofcount(const upb_msgdef *m) {
  4084. return m->oneof_count;
  4085. }
  4086. int upb_msgdef_realoneofcount(const upb_msgdef *m) {
  4087. return m->real_oneof_count;
  4088. }
  4089. const upb_msglayout *upb_msgdef_layout(const upb_msgdef *m) {
  4090. return m->layout;
  4091. }
  4092. const upb_fielddef *upb_msgdef_field(const upb_msgdef *m, int i) {
  4093. UPB_ASSERT(i >= 0 && i < m->field_count);
  4094. return &m->fields[i];
  4095. }
  4096. const upb_oneofdef *upb_msgdef_oneof(const upb_msgdef *m, int i) {
  4097. UPB_ASSERT(i >= 0 && i < m->oneof_count);
  4098. return &m->oneofs[i];
  4099. }
  4100. bool upb_msgdef_mapentry(const upb_msgdef *m) {
  4101. return m->map_entry;
  4102. }
  4103. upb_wellknowntype_t upb_msgdef_wellknowntype(const upb_msgdef *m) {
  4104. return m->well_known_type;
  4105. }
  4106. bool upb_msgdef_isnumberwrapper(const upb_msgdef *m) {
  4107. upb_wellknowntype_t type = upb_msgdef_wellknowntype(m);
  4108. return type >= UPB_WELLKNOWN_DOUBLEVALUE &&
  4109. type <= UPB_WELLKNOWN_UINT32VALUE;
  4110. }
  4111. bool upb_msgdef_iswrapper(const upb_msgdef *m) {
  4112. upb_wellknowntype_t type = upb_msgdef_wellknowntype(m);
  4113. return type >= UPB_WELLKNOWN_DOUBLEVALUE &&
  4114. type <= UPB_WELLKNOWN_BOOLVALUE;
  4115. }
  4116. void upb_msg_field_begin(upb_msg_field_iter *iter, const upb_msgdef *m) {
  4117. upb_inttable_begin(iter, &m->itof);
  4118. }
  4119. void upb_msg_field_next(upb_msg_field_iter *iter) { upb_inttable_next(iter); }
  4120. bool upb_msg_field_done(const upb_msg_field_iter *iter) {
  4121. return upb_inttable_done(iter);
  4122. }
  4123. upb_fielddef *upb_msg_iter_field(const upb_msg_field_iter *iter) {
  4124. return (upb_fielddef *)upb_value_getconstptr(upb_inttable_iter_value(iter));
  4125. }
  4126. void upb_msg_field_iter_setdone(upb_msg_field_iter *iter) {
  4127. upb_inttable_iter_setdone(iter);
  4128. }
  4129. bool upb_msg_field_iter_isequal(const upb_msg_field_iter * iter1,
  4130. const upb_msg_field_iter * iter2) {
  4131. return upb_inttable_iter_isequal(iter1, iter2);
  4132. }
  4133. void upb_msg_oneof_begin(upb_msg_oneof_iter *iter, const upb_msgdef *m) {
  4134. upb_strtable_begin(iter, &m->ntof);
  4135. /* We need to skip past any initial fields. */
  4136. while (!upb_strtable_done(iter) &&
  4137. !unpack_def(upb_strtable_iter_value(iter), UPB_DEFTYPE_ONEOF)) {
  4138. upb_strtable_next(iter);
  4139. }
  4140. }
  4141. void upb_msg_oneof_next(upb_msg_oneof_iter *iter) {
  4142. /* We need to skip past fields to return only oneofs. */
  4143. do {
  4144. upb_strtable_next(iter);
  4145. } while (!upb_strtable_done(iter) &&
  4146. !unpack_def(upb_strtable_iter_value(iter), UPB_DEFTYPE_ONEOF));
  4147. }
  4148. bool upb_msg_oneof_done(const upb_msg_oneof_iter *iter) {
  4149. return upb_strtable_done(iter);
  4150. }
  4151. const upb_oneofdef *upb_msg_iter_oneof(const upb_msg_oneof_iter *iter) {
  4152. return unpack_def(upb_strtable_iter_value(iter), UPB_DEFTYPE_ONEOF);
  4153. }
  4154. void upb_msg_oneof_iter_setdone(upb_msg_oneof_iter *iter) {
  4155. upb_strtable_iter_setdone(iter);
  4156. }
  4157. bool upb_msg_oneof_iter_isequal(const upb_msg_oneof_iter *iter1,
  4158. const upb_msg_oneof_iter *iter2) {
  4159. return upb_strtable_iter_isequal(iter1, iter2);
  4160. }
  4161. /* upb_oneofdef ***************************************************************/
  4162. const char *upb_oneofdef_name(const upb_oneofdef *o) {
  4163. return shortdefname(o->full_name);
  4164. }
  4165. const upb_msgdef *upb_oneofdef_containingtype(const upb_oneofdef *o) {
  4166. return o->parent;
  4167. }
  4168. int upb_oneofdef_fieldcount(const upb_oneofdef *o) {
  4169. return o->field_count;
  4170. }
  4171. const upb_fielddef *upb_oneofdef_field(const upb_oneofdef *o, int i) {
  4172. UPB_ASSERT(i < o->field_count);
  4173. return o->fields[i];
  4174. }
  4175. int upb_oneofdef_numfields(const upb_oneofdef *o) {
  4176. return o->field_count;
  4177. }
  4178. uint32_t upb_oneofdef_index(const upb_oneofdef *o) {
  4179. return o - o->parent->oneofs;
  4180. }
  4181. bool upb_oneofdef_issynthetic(const upb_oneofdef *o) {
  4182. return o->synthetic;
  4183. }
  4184. const upb_fielddef *upb_oneofdef_ntof(const upb_oneofdef *o,
  4185. const char *name, size_t length) {
  4186. upb_value val;
  4187. return upb_strtable_lookup2(&o->ntof, name, length, &val) ?
  4188. upb_value_getptr(val) : NULL;
  4189. }
  4190. const upb_fielddef *upb_oneofdef_itof(const upb_oneofdef *o, uint32_t num) {
  4191. upb_value val;
  4192. return upb_inttable_lookup32(&o->itof, num, &val) ?
  4193. upb_value_getptr(val) : NULL;
  4194. }
  4195. void upb_oneof_begin(upb_oneof_iter *iter, const upb_oneofdef *o) {
  4196. upb_inttable_begin(iter, &o->itof);
  4197. }
  4198. void upb_oneof_next(upb_oneof_iter *iter) {
  4199. upb_inttable_next(iter);
  4200. }
  4201. bool upb_oneof_done(upb_oneof_iter *iter) {
  4202. return upb_inttable_done(iter);
  4203. }
  4204. upb_fielddef *upb_oneof_iter_field(const upb_oneof_iter *iter) {
  4205. return (upb_fielddef *)upb_value_getconstptr(upb_inttable_iter_value(iter));
  4206. }
  4207. void upb_oneof_iter_setdone(upb_oneof_iter *iter) {
  4208. upb_inttable_iter_setdone(iter);
  4209. }
  4210. /* upb_filedef ****************************************************************/
  4211. const char *upb_filedef_name(const upb_filedef *f) {
  4212. return f->name;
  4213. }
  4214. const char *upb_filedef_package(const upb_filedef *f) {
  4215. return f->package;
  4216. }
  4217. const char *upb_filedef_phpprefix(const upb_filedef *f) {
  4218. return f->phpprefix;
  4219. }
  4220. const char *upb_filedef_phpnamespace(const upb_filedef *f) {
  4221. return f->phpnamespace;
  4222. }
  4223. upb_syntax_t upb_filedef_syntax(const upb_filedef *f) {
  4224. return f->syntax;
  4225. }
  4226. int upb_filedef_msgcount(const upb_filedef *f) {
  4227. return f->msg_count;
  4228. }
  4229. int upb_filedef_depcount(const upb_filedef *f) {
  4230. return f->dep_count;
  4231. }
  4232. int upb_filedef_enumcount(const upb_filedef *f) {
  4233. return f->enum_count;
  4234. }
  4235. const upb_filedef *upb_filedef_dep(const upb_filedef *f, int i) {
  4236. return i < 0 || i >= f->dep_count ? NULL : f->deps[i];
  4237. }
  4238. const upb_msgdef *upb_filedef_msg(const upb_filedef *f, int i) {
  4239. return i < 0 || i >= f->msg_count ? NULL : &f->msgs[i];
  4240. }
  4241. const upb_enumdef *upb_filedef_enum(const upb_filedef *f, int i) {
  4242. return i < 0 || i >= f->enum_count ? NULL : &f->enums[i];
  4243. }
  4244. const upb_symtab *upb_filedef_symtab(const upb_filedef *f) {
  4245. return f->symtab;
  4246. }
  4247. void upb_symtab_free(upb_symtab *s) {
  4248. upb_arena_free(s->arena);
  4249. upb_gfree(s);
  4250. }
  4251. upb_symtab *upb_symtab_new(void) {
  4252. upb_symtab *s = upb_gmalloc(sizeof(*s));
  4253. upb_alloc *alloc;
  4254. if (!s) {
  4255. return NULL;
  4256. }
  4257. s->arena = upb_arena_new();
  4258. s->bytes_loaded = 0;
  4259. alloc = upb_arena_alloc(s->arena);
  4260. if (!upb_strtable_init2(&s->syms, UPB_CTYPE_CONSTPTR, 32, alloc) ||
  4261. !upb_strtable_init2(&s->files, UPB_CTYPE_CONSTPTR, 4, alloc)) {
  4262. upb_arena_free(s->arena);
  4263. upb_gfree(s);
  4264. s = NULL;
  4265. }
  4266. return s;
  4267. }
  4268. const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym) {
  4269. upb_value v;
  4270. return upb_strtable_lookup(&s->syms, sym, &v) ?
  4271. unpack_def(v, UPB_DEFTYPE_MSG) : NULL;
  4272. }
  4273. const upb_msgdef *upb_symtab_lookupmsg2(const upb_symtab *s, const char *sym,
  4274. size_t len) {
  4275. upb_value v;
  4276. return upb_strtable_lookup2(&s->syms, sym, len, &v) ?
  4277. unpack_def(v, UPB_DEFTYPE_MSG) : NULL;
  4278. }
  4279. const upb_enumdef *upb_symtab_lookupenum(const upb_symtab *s, const char *sym) {
  4280. upb_value v;
  4281. return upb_strtable_lookup(&s->syms, sym, &v) ?
  4282. unpack_def(v, UPB_DEFTYPE_ENUM) : NULL;
  4283. }
  4284. const upb_filedef *upb_symtab_lookupfile(const upb_symtab *s, const char *name) {
  4285. upb_value v;
  4286. return upb_strtable_lookup(&s->files, name, &v) ? upb_value_getconstptr(v)
  4287. : NULL;
  4288. }
  4289. const upb_filedef *upb_symtab_lookupfile2(
  4290. const upb_symtab *s, const char *name, size_t len) {
  4291. upb_value v;
  4292. return upb_strtable_lookup2(&s->files, name, len, &v) ?
  4293. upb_value_getconstptr(v) : NULL;
  4294. }
  4295. int upb_symtab_filecount(const upb_symtab *s) {
  4296. return (int)upb_strtable_count(&s->files);
  4297. }
  4298. /* Code to build defs from descriptor protos. *********************************/
  4299. /* There is a question of how much validation to do here. It will be difficult
  4300. * to perfectly match the amount of validation performed by proto2. But since
  4301. * this code is used to directly build defs from Ruby (for example) we do need
  4302. * to validate important constraints like uniqueness of names and numbers. */
  4303. #define CHK_OOM(x) if (!(x)) { symtab_oomerr(ctx); }
  4304. typedef struct {
  4305. upb_symtab *symtab;
  4306. upb_filedef *file; /* File we are building. */
  4307. upb_arena *file_arena; /* Allocate defs here. */
  4308. upb_alloc *alloc; /* Alloc of file_arena, for tables. */
  4309. const upb_msglayout **layouts; /* NULL if we should build layouts. */
  4310. upb_status *status; /* Record errors here. */
  4311. jmp_buf err; /* longjmp() on error. */
  4312. } symtab_addctx;
  4313. UPB_NORETURN UPB_NOINLINE UPB_PRINTF(2, 3)
  4314. static void symtab_errf(symtab_addctx *ctx, const char *fmt, ...) {
  4315. va_list argp;
  4316. va_start(argp, fmt);
  4317. upb_status_vseterrf(ctx->status, fmt, argp);
  4318. va_end(argp);
  4319. UPB_LONGJMP(ctx->err, 1);
  4320. }
  4321. UPB_NORETURN UPB_NOINLINE
  4322. static void symtab_oomerr(symtab_addctx *ctx) {
  4323. upb_status_setoom(ctx->status);
  4324. UPB_LONGJMP(ctx->err, 1);
  4325. }
  4326. void *symtab_alloc(symtab_addctx *ctx, size_t bytes) {
  4327. void *ret = upb_arena_malloc(ctx->file_arena, bytes);
  4328. if (!ret) symtab_oomerr(ctx);
  4329. return ret;
  4330. }
  4331. static void check_ident(symtab_addctx *ctx, upb_strview name, bool full) {
  4332. const char *str = name.data;
  4333. size_t len = name.size;
  4334. bool start = true;
  4335. size_t i;
  4336. for (i = 0; i < len; i++) {
  4337. char c = str[i];
  4338. if (c == '.') {
  4339. if (start || !full) {
  4340. symtab_errf(ctx, "invalid name: unexpected '.' (%.*s)", (int)len, str);
  4341. }
  4342. start = true;
  4343. } else if (start) {
  4344. if (!upb_isletter(c)) {
  4345. symtab_errf(
  4346. ctx,
  4347. "invalid name: path components must start with a letter (%.*s)",
  4348. (int)len, str);
  4349. }
  4350. start = false;
  4351. } else {
  4352. if (!upb_isalphanum(c)) {
  4353. symtab_errf(ctx, "invalid name: non-alphanumeric character (%.*s)",
  4354. (int)len, str);
  4355. }
  4356. }
  4357. }
  4358. if (start) {
  4359. symtab_errf(ctx, "invalid name: empty part (%.*s)", (int)len, str);
  4360. }
  4361. }
  4362. static size_t div_round_up(size_t n, size_t d) {
  4363. return (n + d - 1) / d;
  4364. }
  4365. static size_t upb_msgval_sizeof(upb_fieldtype_t type) {
  4366. switch (type) {
  4367. case UPB_TYPE_DOUBLE:
  4368. case UPB_TYPE_INT64:
  4369. case UPB_TYPE_UINT64:
  4370. return 8;
  4371. case UPB_TYPE_ENUM:
  4372. case UPB_TYPE_INT32:
  4373. case UPB_TYPE_UINT32:
  4374. case UPB_TYPE_FLOAT:
  4375. return 4;
  4376. case UPB_TYPE_BOOL:
  4377. return 1;
  4378. case UPB_TYPE_MESSAGE:
  4379. return sizeof(void*);
  4380. case UPB_TYPE_BYTES:
  4381. case UPB_TYPE_STRING:
  4382. return sizeof(upb_strview);
  4383. }
  4384. UPB_UNREACHABLE();
  4385. }
  4386. static uint8_t upb_msg_fielddefsize(const upb_fielddef *f) {
  4387. if (upb_msgdef_mapentry(upb_fielddef_containingtype(f))) {
  4388. upb_map_entry ent;
  4389. UPB_ASSERT(sizeof(ent.k) == sizeof(ent.v));
  4390. return sizeof(ent.k);
  4391. } else if (upb_fielddef_isseq(f)) {
  4392. return sizeof(void*);
  4393. } else {
  4394. return upb_msgval_sizeof(upb_fielddef_type(f));
  4395. }
  4396. }
  4397. static uint32_t upb_msglayout_place(upb_msglayout *l, size_t size) {
  4398. uint32_t ret;
  4399. l->size = UPB_ALIGN_UP(l->size, size);
  4400. ret = l->size;
  4401. l->size += size;
  4402. return ret;
  4403. }
  4404. static int field_number_cmp(const void *p1, const void *p2) {
  4405. const upb_msglayout_field *f1 = p1;
  4406. const upb_msglayout_field *f2 = p2;
  4407. return f1->number - f2->number;
  4408. }
  4409. static void assign_layout_indices(const upb_msgdef *m, upb_msglayout_field *fields) {
  4410. int i;
  4411. int n = upb_msgdef_numfields(m);
  4412. for (i = 0; i < n; i++) {
  4413. upb_fielddef *f = (upb_fielddef*)upb_msgdef_itof(m, fields[i].number);
  4414. UPB_ASSERT(f);
  4415. f->layout_index = i;
  4416. }
  4417. }
  4418. /* This function is the dynamic equivalent of message_layout.{cc,h} in upbc.
  4419. * It computes a dynamic layout for all of the fields in |m|. */
  4420. static void make_layout(symtab_addctx *ctx, const upb_msgdef *m) {
  4421. upb_msglayout *l = (upb_msglayout*)m->layout;
  4422. upb_msg_field_iter it;
  4423. upb_msg_oneof_iter oit;
  4424. size_t hasbit;
  4425. size_t submsg_count = m->submsg_field_count;
  4426. const upb_msglayout **submsgs;
  4427. upb_msglayout_field *fields;
  4428. memset(l, 0, sizeof(*l) + sizeof(_upb_fasttable_entry));
  4429. fields = symtab_alloc(ctx, upb_msgdef_numfields(m) * sizeof(*fields));
  4430. submsgs = symtab_alloc(ctx, submsg_count * sizeof(*submsgs));
  4431. l->field_count = upb_msgdef_numfields(m);
  4432. l->fields = fields;
  4433. l->submsgs = submsgs;
  4434. l->table_mask = 0;
  4435. /* TODO(haberman): initialize fast tables so that reflection-based parsing
  4436. * can get the same speeds as linked-in types. */
  4437. l->fasttable[0].field_parser = &fastdecode_generic;
  4438. l->fasttable[0].field_data = 0;
  4439. if (upb_msgdef_mapentry(m)) {
  4440. /* TODO(haberman): refactor this method so this special case is more
  4441. * elegant. */
  4442. const upb_fielddef *key = upb_msgdef_itof(m, 1);
  4443. const upb_fielddef *val = upb_msgdef_itof(m, 2);
  4444. fields[0].number = 1;
  4445. fields[1].number = 2;
  4446. fields[0].label = UPB_LABEL_OPTIONAL;
  4447. fields[1].label = UPB_LABEL_OPTIONAL;
  4448. fields[0].presence = 0;
  4449. fields[1].presence = 0;
  4450. fields[0].descriptortype = upb_fielddef_descriptortype(key);
  4451. fields[1].descriptortype = upb_fielddef_descriptortype(val);
  4452. fields[0].offset = 0;
  4453. fields[1].offset = sizeof(upb_strview);
  4454. fields[1].submsg_index = 0;
  4455. if (upb_fielddef_type(val) == UPB_TYPE_MESSAGE) {
  4456. submsgs[0] = upb_fielddef_msgsubdef(val)->layout;
  4457. }
  4458. l->field_count = 2;
  4459. l->size = 2 * sizeof(upb_strview);
  4460. l->size = UPB_ALIGN_UP(l->size, 8);
  4461. return;
  4462. }
  4463. /* Allocate data offsets in three stages:
  4464. *
  4465. * 1. hasbits.
  4466. * 2. regular fields.
  4467. * 3. oneof fields.
  4468. *
  4469. * OPT: There is a lot of room for optimization here to minimize the size.
  4470. */
  4471. /* Allocate hasbits and set basic field attributes. */
  4472. submsg_count = 0;
  4473. for (upb_msg_field_begin(&it, m), hasbit = 0;
  4474. !upb_msg_field_done(&it);
  4475. upb_msg_field_next(&it)) {
  4476. upb_fielddef* f = upb_msg_iter_field(&it);
  4477. upb_msglayout_field *field = &fields[upb_fielddef_index(f)];
  4478. field->number = upb_fielddef_number(f);
  4479. field->descriptortype = upb_fielddef_descriptortype(f);
  4480. field->label = upb_fielddef_label(f);
  4481. if (field->descriptortype == UPB_DTYPE_STRING &&
  4482. f->file->syntax == UPB_SYNTAX_PROTO2) {
  4483. /* See TableDescriptorType() in upbc/generator.cc for details and
  4484. * rationale. */
  4485. field->descriptortype = UPB_DTYPE_BYTES;
  4486. }
  4487. if (upb_fielddef_ismap(f)) {
  4488. field->label = _UPB_LABEL_MAP;
  4489. } else if (upb_fielddef_packed(f)) {
  4490. field->label = _UPB_LABEL_PACKED;
  4491. }
  4492. if (upb_fielddef_issubmsg(f)) {
  4493. const upb_msgdef *subm = upb_fielddef_msgsubdef(f);
  4494. field->submsg_index = submsg_count++;
  4495. submsgs[field->submsg_index] = subm->layout;
  4496. }
  4497. if (upb_fielddef_haspresence(f) && !upb_fielddef_realcontainingoneof(f)) {
  4498. /* We don't use hasbit 0, so that 0 can indicate "no presence" in the
  4499. * table. This wastes one hasbit, but we don't worry about it for now. */
  4500. field->presence = ++hasbit;
  4501. } else {
  4502. field->presence = 0;
  4503. }
  4504. }
  4505. /* Account for space used by hasbits. */
  4506. l->size = div_round_up(hasbit, 8);
  4507. /* Allocate non-oneof fields. */
  4508. for (upb_msg_field_begin(&it, m); !upb_msg_field_done(&it);
  4509. upb_msg_field_next(&it)) {
  4510. const upb_fielddef* f = upb_msg_iter_field(&it);
  4511. size_t field_size = upb_msg_fielddefsize(f);
  4512. size_t index = upb_fielddef_index(f);
  4513. if (upb_fielddef_realcontainingoneof(f)) {
  4514. /* Oneofs are handled separately below. */
  4515. continue;
  4516. }
  4517. fields[index].offset = upb_msglayout_place(l, field_size);
  4518. }
  4519. /* Allocate oneof fields. Each oneof field consists of a uint32 for the case
  4520. * and space for the actual data. */
  4521. for (upb_msg_oneof_begin(&oit, m); !upb_msg_oneof_done(&oit);
  4522. upb_msg_oneof_next(&oit)) {
  4523. const upb_oneofdef* o = upb_msg_iter_oneof(&oit);
  4524. upb_oneof_iter fit;
  4525. size_t case_size = sizeof(uint32_t); /* Could potentially optimize this. */
  4526. size_t field_size = 0;
  4527. uint32_t case_offset;
  4528. uint32_t data_offset;
  4529. if (upb_oneofdef_issynthetic(o)) continue;
  4530. /* Calculate field size: the max of all field sizes. */
  4531. for (upb_oneof_begin(&fit, o);
  4532. !upb_oneof_done(&fit);
  4533. upb_oneof_next(&fit)) {
  4534. const upb_fielddef* f = upb_oneof_iter_field(&fit);
  4535. field_size = UPB_MAX(field_size, upb_msg_fielddefsize(f));
  4536. }
  4537. /* Align and allocate case offset. */
  4538. case_offset = upb_msglayout_place(l, case_size);
  4539. data_offset = upb_msglayout_place(l, field_size);
  4540. for (upb_oneof_begin(&fit, o);
  4541. !upb_oneof_done(&fit);
  4542. upb_oneof_next(&fit)) {
  4543. const upb_fielddef* f = upb_oneof_iter_field(&fit);
  4544. fields[upb_fielddef_index(f)].offset = data_offset;
  4545. fields[upb_fielddef_index(f)].presence = ~case_offset;
  4546. }
  4547. }
  4548. /* Size of the entire structure should be a multiple of its greatest
  4549. * alignment. TODO: track overall alignment for real? */
  4550. l->size = UPB_ALIGN_UP(l->size, 8);
  4551. /* Sort fields by number. */
  4552. qsort(fields, upb_msgdef_numfields(m), sizeof(*fields), field_number_cmp);
  4553. assign_layout_indices(m, fields);
  4554. }
  4555. static void assign_msg_indices(symtab_addctx *ctx, upb_msgdef *m) {
  4556. /* Sort fields. upb internally relies on UPB_TYPE_MESSAGE fields having the
  4557. * lowest indexes, but we do not publicly guarantee this. */
  4558. upb_msg_field_iter j;
  4559. int i;
  4560. uint32_t selector;
  4561. int n = upb_msgdef_numfields(m);
  4562. upb_fielddef **fields;
  4563. if (n == 0) {
  4564. m->selector_count = UPB_STATIC_SELECTOR_COUNT;
  4565. m->submsg_field_count = 0;
  4566. return;
  4567. }
  4568. fields = upb_gmalloc(n * sizeof(*fields));
  4569. m->submsg_field_count = 0;
  4570. for(i = 0, upb_msg_field_begin(&j, m);
  4571. !upb_msg_field_done(&j);
  4572. upb_msg_field_next(&j), i++) {
  4573. upb_fielddef *f = upb_msg_iter_field(&j);
  4574. UPB_ASSERT(f->msgdef == m);
  4575. if (upb_fielddef_issubmsg(f)) {
  4576. m->submsg_field_count++;
  4577. }
  4578. fields[i] = f;
  4579. }
  4580. qsort(fields, n, sizeof(*fields), cmp_fields);
  4581. selector = UPB_STATIC_SELECTOR_COUNT + m->submsg_field_count;
  4582. for (i = 0; i < n; i++) {
  4583. upb_fielddef *f = fields[i];
  4584. f->index_ = i;
  4585. f->selector_base = selector + upb_handlers_selectorbaseoffset(f);
  4586. selector += upb_handlers_selectorcount(f);
  4587. }
  4588. m->selector_count = selector;
  4589. upb_gfree(fields);
  4590. }
  4591. static char *strviewdup(symtab_addctx *ctx, upb_strview view) {
  4592. return upb_strdup2(view.data, view.size, ctx->alloc);
  4593. }
  4594. static bool streql2(const char *a, size_t n, const char *b) {
  4595. return n == strlen(b) && memcmp(a, b, n) == 0;
  4596. }
  4597. static bool streql_view(upb_strview view, const char *b) {
  4598. return streql2(view.data, view.size, b);
  4599. }
  4600. static const char *makefullname(symtab_addctx *ctx, const char *prefix,
  4601. upb_strview name) {
  4602. if (prefix) {
  4603. /* ret = prefix + '.' + name; */
  4604. size_t n = strlen(prefix);
  4605. char *ret = symtab_alloc(ctx, n + name.size + 2);
  4606. strcpy(ret, prefix);
  4607. ret[n] = '.';
  4608. memcpy(&ret[n + 1], name.data, name.size);
  4609. ret[n + 1 + name.size] = '\0';
  4610. return ret;
  4611. } else {
  4612. return strviewdup(ctx, name);
  4613. }
  4614. }
  4615. static void finalize_oneofs(symtab_addctx *ctx, upb_msgdef *m) {
  4616. int i;
  4617. int synthetic_count = 0;
  4618. upb_oneofdef *mutable_oneofs = (upb_oneofdef*)m->oneofs;
  4619. for (i = 0; i < m->oneof_count; i++) {
  4620. upb_oneofdef *o = &mutable_oneofs[i];
  4621. if (o->synthetic && o->field_count != 1) {
  4622. symtab_errf(ctx, "Synthetic oneofs must have one field, not %d: %s",
  4623. o->field_count, upb_oneofdef_name(o));
  4624. }
  4625. if (o->synthetic) {
  4626. synthetic_count++;
  4627. } else if (synthetic_count != 0) {
  4628. symtab_errf(ctx, "Synthetic oneofs must be after all other oneofs: %s",
  4629. upb_oneofdef_name(o));
  4630. }
  4631. o->fields = symtab_alloc(ctx, sizeof(upb_fielddef *) * o->field_count);
  4632. o->field_count = 0;
  4633. }
  4634. for (i = 0; i < m->field_count; i++) {
  4635. const upb_fielddef *f = &m->fields[i];
  4636. upb_oneofdef *o = (upb_oneofdef*)f->oneof;
  4637. if (o) {
  4638. o->fields[o->field_count++] = f;
  4639. }
  4640. }
  4641. m->real_oneof_count = m->oneof_count - synthetic_count;
  4642. }
  4643. size_t getjsonname(const char *name, char *buf, size_t len) {
  4644. size_t src, dst = 0;
  4645. bool ucase_next = false;
  4646. #define WRITE(byte) \
  4647. ++dst; \
  4648. if (dst < len) buf[dst - 1] = byte; \
  4649. else if (dst == len) buf[dst - 1] = '\0'
  4650. if (!name) {
  4651. WRITE('\0');
  4652. return 0;
  4653. }
  4654. /* Implement the transformation as described in the spec:
  4655. * 1. upper case all letters after an underscore.
  4656. * 2. remove all underscores.
  4657. */
  4658. for (src = 0; name[src]; src++) {
  4659. if (name[src] == '_') {
  4660. ucase_next = true;
  4661. continue;
  4662. }
  4663. if (ucase_next) {
  4664. WRITE(toupper(name[src]));
  4665. ucase_next = false;
  4666. } else {
  4667. WRITE(name[src]);
  4668. }
  4669. }
  4670. WRITE('\0');
  4671. return dst;
  4672. #undef WRITE
  4673. }
  4674. static char* makejsonname(symtab_addctx *ctx, const char* name) {
  4675. size_t size = getjsonname(name, NULL, 0);
  4676. char* json_name = symtab_alloc(ctx, size);
  4677. getjsonname(name, json_name, size);
  4678. return json_name;
  4679. }
  4680. static void symtab_add(symtab_addctx *ctx, const char *name, upb_value v) {
  4681. if (upb_strtable_lookup(&ctx->symtab->syms, name, NULL)) {
  4682. symtab_errf(ctx, "duplicate symbol '%s'", name);
  4683. }
  4684. upb_alloc *alloc = upb_arena_alloc(ctx->symtab->arena);
  4685. size_t len = strlen(name);
  4686. CHK_OOM(upb_strtable_insert3(&ctx->symtab->syms, name, len, v, alloc));
  4687. }
  4688. /* Given a symbol and the base symbol inside which it is defined, find the
  4689. * symbol's definition in t. */
  4690. static const void *symtab_resolve(symtab_addctx *ctx, const upb_fielddef *f,
  4691. const char *base, upb_strview sym,
  4692. upb_deftype_t type) {
  4693. const upb_strtable *t = &ctx->symtab->syms;
  4694. if(sym.size == 0) goto notfound;
  4695. if(sym.data[0] == '.') {
  4696. /* Symbols starting with '.' are absolute, so we do a single lookup.
  4697. * Slice to omit the leading '.' */
  4698. upb_value v;
  4699. if (!upb_strtable_lookup2(t, sym.data + 1, sym.size - 1, &v)) {
  4700. goto notfound;
  4701. }
  4702. const void *ret = unpack_def(v, type);
  4703. if (!ret) {
  4704. symtab_errf(ctx, "type mismatch when resolving field %s, name %s",
  4705. f->full_name, sym.data);
  4706. }
  4707. return ret;
  4708. } else {
  4709. /* Remove components from base until we find an entry or run out.
  4710. * TODO: This branch is totally broken, but currently not used. */
  4711. (void)base;
  4712. UPB_ASSERT(false);
  4713. goto notfound;
  4714. }
  4715. notfound:
  4716. symtab_errf(ctx, "couldn't resolve name '%s'", sym.data);
  4717. }
  4718. static void create_oneofdef(
  4719. symtab_addctx *ctx, upb_msgdef *m,
  4720. const google_protobuf_OneofDescriptorProto *oneof_proto) {
  4721. upb_oneofdef *o;
  4722. upb_strview name = google_protobuf_OneofDescriptorProto_name(oneof_proto);
  4723. upb_value v;
  4724. o = (upb_oneofdef*)&m->oneofs[m->oneof_count++];
  4725. o->parent = m;
  4726. o->full_name = makefullname(ctx, m->full_name, name);
  4727. o->field_count = 0;
  4728. o->synthetic = false;
  4729. v = pack_def(o, UPB_DEFTYPE_ONEOF);
  4730. symtab_add(ctx, o->full_name, v);
  4731. CHK_OOM(upb_strtable_insert3(&m->ntof, name.data, name.size, v, ctx->alloc));
  4732. CHK_OOM(upb_inttable_init2(&o->itof, UPB_CTYPE_CONSTPTR, ctx->alloc));
  4733. CHK_OOM(upb_strtable_init2(&o->ntof, UPB_CTYPE_CONSTPTR, 4, ctx->alloc));
  4734. }
  4735. static str_t *newstr(symtab_addctx *ctx, const char *data, size_t len) {
  4736. str_t *ret = symtab_alloc(ctx, sizeof(*ret) + len);
  4737. if (!ret) return NULL;
  4738. ret->len = len;
  4739. if (len) memcpy(ret->str, data, len);
  4740. ret->str[len] = '\0';
  4741. return ret;
  4742. }
  4743. static void parse_default(symtab_addctx *ctx, const char *str, size_t len,
  4744. upb_fielddef *f) {
  4745. char *end;
  4746. char nullz[64];
  4747. errno = 0;
  4748. switch (upb_fielddef_type(f)) {
  4749. case UPB_TYPE_INT32:
  4750. case UPB_TYPE_INT64:
  4751. case UPB_TYPE_UINT32:
  4752. case UPB_TYPE_UINT64:
  4753. case UPB_TYPE_DOUBLE:
  4754. case UPB_TYPE_FLOAT:
  4755. /* Standard C number parsing functions expect null-terminated strings. */
  4756. if (len >= sizeof(nullz) - 1) {
  4757. symtab_errf(ctx, "Default too long: %.*s", (int)len, str);
  4758. }
  4759. memcpy(nullz, str, len);
  4760. nullz[len] = '\0';
  4761. str = nullz;
  4762. break;
  4763. default:
  4764. break;
  4765. }
  4766. switch (upb_fielddef_type(f)) {
  4767. case UPB_TYPE_INT32: {
  4768. long val = strtol(str, &end, 0);
  4769. if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end) {
  4770. goto invalid;
  4771. }
  4772. f->defaultval.sint = val;
  4773. break;
  4774. }
  4775. case UPB_TYPE_ENUM: {
  4776. const upb_enumdef *e = f->sub.enumdef;
  4777. int32_t val;
  4778. if (!upb_enumdef_ntoi(e, str, len, &val)) {
  4779. goto invalid;
  4780. }
  4781. f->defaultval.sint = val;
  4782. break;
  4783. }
  4784. case UPB_TYPE_INT64: {
  4785. /* XXX: Need to write our own strtoll, since it's not available in c89. */
  4786. int64_t val = strtol(str, &end, 0);
  4787. if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || *end) {
  4788. goto invalid;
  4789. }
  4790. f->defaultval.sint = val;
  4791. break;
  4792. }
  4793. case UPB_TYPE_UINT32: {
  4794. unsigned long val = strtoul(str, &end, 0);
  4795. if (val > UINT32_MAX || errno == ERANGE || *end) {
  4796. goto invalid;
  4797. }
  4798. f->defaultval.uint = val;
  4799. break;
  4800. }
  4801. case UPB_TYPE_UINT64: {
  4802. /* XXX: Need to write our own strtoull, since it's not available in c89. */
  4803. uint64_t val = strtoul(str, &end, 0);
  4804. if (val > UINT64_MAX || errno == ERANGE || *end) {
  4805. goto invalid;
  4806. }
  4807. f->defaultval.uint = val;
  4808. break;
  4809. }
  4810. case UPB_TYPE_DOUBLE: {
  4811. double val = strtod(str, &end);
  4812. if (errno == ERANGE || *end) {
  4813. goto invalid;
  4814. }
  4815. f->defaultval.dbl = val;
  4816. break;
  4817. }
  4818. case UPB_TYPE_FLOAT: {
  4819. /* XXX: Need to write our own strtof, since it's not available in c89. */
  4820. float val = strtod(str, &end);
  4821. if (errno == ERANGE || *end) {
  4822. goto invalid;
  4823. }
  4824. f->defaultval.flt = val;
  4825. break;
  4826. }
  4827. case UPB_TYPE_BOOL: {
  4828. if (streql2(str, len, "false")) {
  4829. f->defaultval.boolean = false;
  4830. } else if (streql2(str, len, "true")) {
  4831. f->defaultval.boolean = true;
  4832. } else {
  4833. }
  4834. break;
  4835. }
  4836. case UPB_TYPE_STRING:
  4837. f->defaultval.str = newstr(ctx, str, len);
  4838. break;
  4839. case UPB_TYPE_BYTES:
  4840. /* XXX: need to interpret the C-escaped value. */
  4841. f->defaultval.str = newstr(ctx, str, len);
  4842. break;
  4843. case UPB_TYPE_MESSAGE:
  4844. /* Should not have a default value. */
  4845. symtab_errf(ctx, "Message should not have a default (%s)",
  4846. upb_fielddef_fullname(f));
  4847. }
  4848. return;
  4849. invalid:
  4850. symtab_errf(ctx, "Invalid default '%.*s' for field %s", (int)len, str,
  4851. upb_fielddef_fullname(f));
  4852. }
  4853. static void set_default_default(symtab_addctx *ctx, upb_fielddef *f) {
  4854. switch (upb_fielddef_type(f)) {
  4855. case UPB_TYPE_INT32:
  4856. case UPB_TYPE_INT64:
  4857. case UPB_TYPE_ENUM:
  4858. f->defaultval.sint = 0;
  4859. break;
  4860. case UPB_TYPE_UINT64:
  4861. case UPB_TYPE_UINT32:
  4862. f->defaultval.uint = 0;
  4863. break;
  4864. case UPB_TYPE_DOUBLE:
  4865. case UPB_TYPE_FLOAT:
  4866. f->defaultval.dbl = 0;
  4867. break;
  4868. case UPB_TYPE_STRING:
  4869. case UPB_TYPE_BYTES:
  4870. f->defaultval.str = newstr(ctx, NULL, 0);
  4871. break;
  4872. case UPB_TYPE_BOOL:
  4873. f->defaultval.boolean = false;
  4874. break;
  4875. case UPB_TYPE_MESSAGE:
  4876. break;
  4877. }
  4878. }
  4879. static void create_fielddef(
  4880. symtab_addctx *ctx, const char *prefix, upb_msgdef *m,
  4881. const google_protobuf_FieldDescriptorProto *field_proto) {
  4882. upb_alloc *alloc = ctx->alloc;
  4883. upb_fielddef *f;
  4884. const google_protobuf_FieldOptions *options;
  4885. upb_strview name;
  4886. const char *full_name;
  4887. const char *json_name;
  4888. const char *shortname;
  4889. uint32_t field_number;
  4890. if (!google_protobuf_FieldDescriptorProto_has_name(field_proto)) {
  4891. symtab_errf(ctx, "field has no name (%s)", upb_msgdef_fullname(m));
  4892. }
  4893. name = google_protobuf_FieldDescriptorProto_name(field_proto);
  4894. check_ident(ctx, name, false);
  4895. full_name = makefullname(ctx, prefix, name);
  4896. shortname = shortdefname(full_name);
  4897. if (google_protobuf_FieldDescriptorProto_has_json_name(field_proto)) {
  4898. json_name = strviewdup(
  4899. ctx, google_protobuf_FieldDescriptorProto_json_name(field_proto));
  4900. } else {
  4901. json_name = makejsonname(ctx, shortname);
  4902. }
  4903. field_number = google_protobuf_FieldDescriptorProto_number(field_proto);
  4904. if (field_number == 0 || field_number > UPB_MAX_FIELDNUMBER) {
  4905. symtab_errf(ctx, "invalid field number (%u)", field_number);
  4906. }
  4907. if (m) {
  4908. /* direct message field. */
  4909. upb_value v, field_v, json_v;
  4910. size_t json_size;
  4911. f = (upb_fielddef*)&m->fields[m->field_count++];
  4912. f->msgdef = m;
  4913. f->is_extension_ = false;
  4914. if (upb_strtable_lookup(&m->ntof, shortname, NULL)) {
  4915. symtab_errf(ctx, "duplicate field name (%s)", shortname);
  4916. }
  4917. if (upb_strtable_lookup(&m->ntof, json_name, NULL)) {
  4918. symtab_errf(ctx, "duplicate json_name (%s)", json_name);
  4919. }
  4920. if (upb_inttable_lookup(&m->itof, field_number, NULL)) {
  4921. symtab_errf(ctx, "duplicate field number (%u)", field_number);
  4922. }
  4923. field_v = pack_def(f, UPB_DEFTYPE_FIELD);
  4924. json_v = pack_def(f, UPB_DEFTYPE_FIELD_JSONNAME);
  4925. v = upb_value_constptr(f);
  4926. json_size = strlen(json_name);
  4927. CHK_OOM(
  4928. upb_strtable_insert3(&m->ntof, name.data, name.size, field_v, alloc));
  4929. CHK_OOM(upb_inttable_insert2(&m->itof, field_number, v, alloc));
  4930. if (strcmp(shortname, json_name) != 0) {
  4931. upb_strtable_insert3(&m->ntof, json_name, json_size, json_v, alloc);
  4932. }
  4933. if (ctx->layouts) {
  4934. const upb_msglayout_field *fields = m->layout->fields;
  4935. int count = m->layout->field_count;
  4936. bool found = false;
  4937. int i;
  4938. for (i = 0; i < count; i++) {
  4939. if (fields[i].number == field_number) {
  4940. f->layout_index = i;
  4941. found = true;
  4942. break;
  4943. }
  4944. }
  4945. UPB_ASSERT(found);
  4946. }
  4947. } else {
  4948. /* extension field. */
  4949. f = (upb_fielddef*)&ctx->file->exts[ctx->file->ext_count++];
  4950. f->is_extension_ = true;
  4951. symtab_add(ctx, full_name, pack_def(f, UPB_DEFTYPE_FIELD));
  4952. }
  4953. f->full_name = full_name;
  4954. f->json_name = json_name;
  4955. f->file = ctx->file;
  4956. f->type_ = (int)google_protobuf_FieldDescriptorProto_type(field_proto);
  4957. f->label_ = (int)google_protobuf_FieldDescriptorProto_label(field_proto);
  4958. f->number_ = field_number;
  4959. f->oneof = NULL;
  4960. f->proto3_optional_ =
  4961. google_protobuf_FieldDescriptorProto_proto3_optional(field_proto);
  4962. /* We can't resolve the subdef or (in the case of extensions) the containing
  4963. * message yet, because it may not have been defined yet. We stash a pointer
  4964. * to the field_proto until later when we can properly resolve it. */
  4965. f->sub.unresolved = field_proto;
  4966. if (f->label_ == UPB_LABEL_REQUIRED && f->file->syntax == UPB_SYNTAX_PROTO3) {
  4967. symtab_errf(ctx, "proto3 fields cannot be required (%s)", f->full_name);
  4968. }
  4969. if (google_protobuf_FieldDescriptorProto_has_oneof_index(field_proto)) {
  4970. int oneof_index =
  4971. google_protobuf_FieldDescriptorProto_oneof_index(field_proto);
  4972. upb_oneofdef *oneof;
  4973. upb_value v = upb_value_constptr(f);
  4974. if (upb_fielddef_label(f) != UPB_LABEL_OPTIONAL) {
  4975. symtab_errf(ctx, "fields in oneof must have OPTIONAL label (%s)",
  4976. f->full_name);
  4977. }
  4978. if (!m) {
  4979. symtab_errf(ctx, "oneof_index provided for extension field (%s)",
  4980. f->full_name);
  4981. }
  4982. if (oneof_index >= m->oneof_count) {
  4983. symtab_errf(ctx, "oneof_index out of range (%s)", f->full_name);
  4984. }
  4985. oneof = (upb_oneofdef*)&m->oneofs[oneof_index];
  4986. f->oneof = oneof;
  4987. oneof->field_count++;
  4988. if (f->proto3_optional_) {
  4989. oneof->synthetic = true;
  4990. }
  4991. CHK_OOM(upb_inttable_insert2(&oneof->itof, f->number_, v, alloc));
  4992. CHK_OOM(upb_strtable_insert3(&oneof->ntof, name.data, name.size, v, alloc));
  4993. } else {
  4994. f->oneof = NULL;
  4995. if (f->proto3_optional_) {
  4996. symtab_errf(ctx, "field with proto3_optional was not in a oneof (%s)",
  4997. f->full_name);
  4998. }
  4999. }
  5000. options = google_protobuf_FieldDescriptorProto_has_options(field_proto) ?
  5001. google_protobuf_FieldDescriptorProto_options(field_proto) : NULL;
  5002. if (options && google_protobuf_FieldOptions_has_packed(options)) {
  5003. f->packed_ = google_protobuf_FieldOptions_packed(options);
  5004. } else {
  5005. /* Repeated fields default to packed for proto3 only. */
  5006. f->packed_ = upb_fielddef_isprimitive(f) &&
  5007. f->label_ == UPB_LABEL_REPEATED && f->file->syntax == UPB_SYNTAX_PROTO3;
  5008. }
  5009. if (options) {
  5010. f->lazy_ = google_protobuf_FieldOptions_lazy(options);
  5011. } else {
  5012. f->lazy_ = false;
  5013. }
  5014. }
  5015. static void create_enumdef(
  5016. symtab_addctx *ctx, const char *prefix,
  5017. const google_protobuf_EnumDescriptorProto *enum_proto) {
  5018. upb_enumdef *e;
  5019. const google_protobuf_EnumValueDescriptorProto *const *values;
  5020. upb_strview name;
  5021. size_t i, n;
  5022. name = google_protobuf_EnumDescriptorProto_name(enum_proto);
  5023. check_ident(ctx, name, false);
  5024. e = (upb_enumdef*)&ctx->file->enums[ctx->file->enum_count++];
  5025. e->full_name = makefullname(ctx, prefix, name);
  5026. symtab_add(ctx, e->full_name, pack_def(e, UPB_DEFTYPE_ENUM));
  5027. values = google_protobuf_EnumDescriptorProto_value(enum_proto, &n);
  5028. CHK_OOM(upb_strtable_init2(&e->ntoi, UPB_CTYPE_INT32, n, ctx->alloc));
  5029. CHK_OOM(upb_inttable_init2(&e->iton, UPB_CTYPE_CSTR, ctx->alloc));
  5030. e->file = ctx->file;
  5031. e->defaultval = 0;
  5032. if (n == 0) {
  5033. symtab_errf(ctx, "enums must contain at least one value (%s)",
  5034. e->full_name);
  5035. }
  5036. for (i = 0; i < n; i++) {
  5037. const google_protobuf_EnumValueDescriptorProto *value = values[i];
  5038. upb_strview name = google_protobuf_EnumValueDescriptorProto_name(value);
  5039. char *name2 = strviewdup(ctx, name);
  5040. int32_t num = google_protobuf_EnumValueDescriptorProto_number(value);
  5041. upb_value v = upb_value_int32(num);
  5042. if (i == 0 && e->file->syntax == UPB_SYNTAX_PROTO3 && num != 0) {
  5043. symtab_errf(ctx, "for proto3, the first enum value must be zero (%s)",
  5044. e->full_name);
  5045. }
  5046. if (upb_strtable_lookup(&e->ntoi, name2, NULL)) {
  5047. symtab_errf(ctx, "duplicate enum label '%s'", name2);
  5048. }
  5049. CHK_OOM(name2)
  5050. CHK_OOM(
  5051. upb_strtable_insert3(&e->ntoi, name2, strlen(name2), v, ctx->alloc));
  5052. if (!upb_inttable_lookup(&e->iton, num, NULL)) {
  5053. upb_value v = upb_value_cstr(name2);
  5054. CHK_OOM(upb_inttable_insert2(&e->iton, num, v, ctx->alloc));
  5055. }
  5056. }
  5057. upb_inttable_compact2(&e->iton, ctx->alloc);
  5058. }
  5059. static void create_msgdef(symtab_addctx *ctx, const char *prefix,
  5060. const google_protobuf_DescriptorProto *msg_proto) {
  5061. upb_msgdef *m;
  5062. const google_protobuf_MessageOptions *options;
  5063. const google_protobuf_OneofDescriptorProto *const *oneofs;
  5064. const google_protobuf_FieldDescriptorProto *const *fields;
  5065. const google_protobuf_EnumDescriptorProto *const *enums;
  5066. const google_protobuf_DescriptorProto *const *msgs;
  5067. size_t i, n_oneof, n_field, n;
  5068. upb_strview name;
  5069. name = google_protobuf_DescriptorProto_name(msg_proto);
  5070. check_ident(ctx, name, false);
  5071. m = (upb_msgdef*)&ctx->file->msgs[ctx->file->msg_count++];
  5072. m->full_name = makefullname(ctx, prefix, name);
  5073. symtab_add(ctx, m->full_name, pack_def(m, UPB_DEFTYPE_MSG));
  5074. oneofs = google_protobuf_DescriptorProto_oneof_decl(msg_proto, &n_oneof);
  5075. fields = google_protobuf_DescriptorProto_field(msg_proto, &n_field);
  5076. CHK_OOM(upb_inttable_init2(&m->itof, UPB_CTYPE_CONSTPTR, ctx->alloc));
  5077. CHK_OOM(upb_strtable_init2(&m->ntof, UPB_CTYPE_CONSTPTR, n_oneof + n_field,
  5078. ctx->alloc));
  5079. m->file = ctx->file;
  5080. m->map_entry = false;
  5081. options = google_protobuf_DescriptorProto_options(msg_proto);
  5082. if (options) {
  5083. m->map_entry = google_protobuf_MessageOptions_map_entry(options);
  5084. }
  5085. if (ctx->layouts) {
  5086. m->layout = *ctx->layouts;
  5087. ctx->layouts++;
  5088. } else {
  5089. /* Allocate now (to allow cross-linking), populate later. */
  5090. m->layout = symtab_alloc(
  5091. ctx, sizeof(*m->layout) + sizeof(_upb_fasttable_entry));
  5092. }
  5093. m->oneof_count = 0;
  5094. m->oneofs = symtab_alloc(ctx, sizeof(*m->oneofs) * n_oneof);
  5095. for (i = 0; i < n_oneof; i++) {
  5096. create_oneofdef(ctx, m, oneofs[i]);
  5097. }
  5098. m->field_count = 0;
  5099. m->fields = symtab_alloc(ctx, sizeof(*m->fields) * n_field);
  5100. for (i = 0; i < n_field; i++) {
  5101. create_fielddef(ctx, m->full_name, m, fields[i]);
  5102. }
  5103. assign_msg_indices(ctx, m);
  5104. finalize_oneofs(ctx, m);
  5105. assign_msg_wellknowntype(m);
  5106. upb_inttable_compact2(&m->itof, ctx->alloc);
  5107. /* This message is built. Now build nested messages and enums. */
  5108. enums = google_protobuf_DescriptorProto_enum_type(msg_proto, &n);
  5109. for (i = 0; i < n; i++) {
  5110. create_enumdef(ctx, m->full_name, enums[i]);
  5111. }
  5112. msgs = google_protobuf_DescriptorProto_nested_type(msg_proto, &n);
  5113. for (i = 0; i < n; i++) {
  5114. create_msgdef(ctx, m->full_name, msgs[i]);
  5115. }
  5116. }
  5117. static void count_types_in_msg(const google_protobuf_DescriptorProto *msg_proto,
  5118. upb_filedef *file) {
  5119. const google_protobuf_DescriptorProto *const *msgs;
  5120. size_t i, n;
  5121. file->msg_count++;
  5122. msgs = google_protobuf_DescriptorProto_nested_type(msg_proto, &n);
  5123. for (i = 0; i < n; i++) {
  5124. count_types_in_msg(msgs[i], file);
  5125. }
  5126. google_protobuf_DescriptorProto_enum_type(msg_proto, &n);
  5127. file->enum_count += n;
  5128. google_protobuf_DescriptorProto_extension(msg_proto, &n);
  5129. file->ext_count += n;
  5130. }
  5131. static void count_types_in_file(
  5132. const google_protobuf_FileDescriptorProto *file_proto,
  5133. upb_filedef *file) {
  5134. const google_protobuf_DescriptorProto *const *msgs;
  5135. size_t i, n;
  5136. msgs = google_protobuf_FileDescriptorProto_message_type(file_proto, &n);
  5137. for (i = 0; i < n; i++) {
  5138. count_types_in_msg(msgs[i], file);
  5139. }
  5140. google_protobuf_FileDescriptorProto_enum_type(file_proto, &n);
  5141. file->enum_count += n;
  5142. google_protobuf_FileDescriptorProto_extension(file_proto, &n);
  5143. file->ext_count += n;
  5144. }
  5145. static void resolve_fielddef(symtab_addctx *ctx, const char *prefix,
  5146. upb_fielddef *f) {
  5147. upb_strview name;
  5148. const google_protobuf_FieldDescriptorProto *field_proto = f->sub.unresolved;
  5149. if (f->is_extension_) {
  5150. if (!google_protobuf_FieldDescriptorProto_has_extendee(field_proto)) {
  5151. symtab_errf(ctx, "extension for field '%s' had no extendee",
  5152. f->full_name);
  5153. }
  5154. name = google_protobuf_FieldDescriptorProto_extendee(field_proto);
  5155. f->msgdef = symtab_resolve(ctx, f, prefix, name, UPB_DEFTYPE_MSG);
  5156. }
  5157. if ((upb_fielddef_issubmsg(f) || f->type_ == UPB_DESCRIPTOR_TYPE_ENUM) &&
  5158. !google_protobuf_FieldDescriptorProto_has_type_name(field_proto)) {
  5159. symtab_errf(ctx, "field '%s' is missing type name", f->full_name);
  5160. }
  5161. name = google_protobuf_FieldDescriptorProto_type_name(field_proto);
  5162. if (upb_fielddef_issubmsg(f)) {
  5163. f->sub.msgdef = symtab_resolve(ctx, f, prefix, name, UPB_DEFTYPE_MSG);
  5164. } else if (f->type_ == UPB_DESCRIPTOR_TYPE_ENUM) {
  5165. f->sub.enumdef = symtab_resolve(ctx, f, prefix, name, UPB_DEFTYPE_ENUM);
  5166. }
  5167. /* Have to delay resolving of the default value until now because of the enum
  5168. * case, since enum defaults are specified with a label. */
  5169. if (google_protobuf_FieldDescriptorProto_has_default_value(field_proto)) {
  5170. upb_strview defaultval =
  5171. google_protobuf_FieldDescriptorProto_default_value(field_proto);
  5172. if (f->file->syntax == UPB_SYNTAX_PROTO3) {
  5173. symtab_errf(ctx, "proto3 fields cannot have explicit defaults (%s)",
  5174. f->full_name);
  5175. }
  5176. if (upb_fielddef_issubmsg(f)) {
  5177. symtab_errf(ctx, "message fields cannot have explicit defaults (%s)",
  5178. f->full_name);
  5179. }
  5180. parse_default(ctx, defaultval.data, defaultval.size, f);
  5181. } else {
  5182. set_default_default(ctx, f);
  5183. }
  5184. }
  5185. static void build_filedef(
  5186. symtab_addctx *ctx, upb_filedef *file,
  5187. const google_protobuf_FileDescriptorProto *file_proto) {
  5188. const google_protobuf_FileOptions *file_options_proto;
  5189. const google_protobuf_DescriptorProto *const *msgs;
  5190. const google_protobuf_EnumDescriptorProto *const *enums;
  5191. const google_protobuf_FieldDescriptorProto *const *exts;
  5192. const upb_strview* strs;
  5193. size_t i, n;
  5194. count_types_in_file(file_proto, file);
  5195. file->msgs = symtab_alloc(ctx, sizeof(*file->msgs) * file->msg_count);
  5196. file->enums = symtab_alloc(ctx, sizeof(*file->enums) * file->enum_count);
  5197. file->exts = symtab_alloc(ctx, sizeof(*file->exts) * file->ext_count);
  5198. /* We increment these as defs are added. */
  5199. file->msg_count = 0;
  5200. file->enum_count = 0;
  5201. file->ext_count = 0;
  5202. if (!google_protobuf_FileDescriptorProto_has_name(file_proto)) {
  5203. symtab_errf(ctx, "File has no name");
  5204. }
  5205. file->name =
  5206. strviewdup(ctx, google_protobuf_FileDescriptorProto_name(file_proto));
  5207. file->phpprefix = NULL;
  5208. file->phpnamespace = NULL;
  5209. if (google_protobuf_FileDescriptorProto_has_package(file_proto)) {
  5210. upb_strview package =
  5211. google_protobuf_FileDescriptorProto_package(file_proto);
  5212. check_ident(ctx, package, true);
  5213. file->package = strviewdup(ctx, package);
  5214. } else {
  5215. file->package = NULL;
  5216. }
  5217. if (google_protobuf_FileDescriptorProto_has_syntax(file_proto)) {
  5218. upb_strview syntax =
  5219. google_protobuf_FileDescriptorProto_syntax(file_proto);
  5220. if (streql_view(syntax, "proto2")) {
  5221. file->syntax = UPB_SYNTAX_PROTO2;
  5222. } else if (streql_view(syntax, "proto3")) {
  5223. file->syntax = UPB_SYNTAX_PROTO3;
  5224. } else {
  5225. symtab_errf(ctx, "Invalid syntax '" UPB_STRVIEW_FORMAT "'",
  5226. UPB_STRVIEW_ARGS(syntax));
  5227. }
  5228. } else {
  5229. file->syntax = UPB_SYNTAX_PROTO2;
  5230. }
  5231. /* Read options. */
  5232. file_options_proto = google_protobuf_FileDescriptorProto_options(file_proto);
  5233. if (file_options_proto) {
  5234. if (google_protobuf_FileOptions_has_php_class_prefix(file_options_proto)) {
  5235. file->phpprefix = strviewdup(
  5236. ctx,
  5237. google_protobuf_FileOptions_php_class_prefix(file_options_proto));
  5238. }
  5239. if (google_protobuf_FileOptions_has_php_namespace(file_options_proto)) {
  5240. file->phpnamespace = strviewdup(
  5241. ctx, google_protobuf_FileOptions_php_namespace(file_options_proto));
  5242. }
  5243. }
  5244. /* Verify dependencies. */
  5245. strs = google_protobuf_FileDescriptorProto_dependency(file_proto, &n);
  5246. file->deps = symtab_alloc(ctx, sizeof(*file->deps) * n);
  5247. for (i = 0; i < n; i++) {
  5248. upb_strview dep_name = strs[i];
  5249. upb_value v;
  5250. if (!upb_strtable_lookup2(&ctx->symtab->files, dep_name.data,
  5251. dep_name.size, &v)) {
  5252. symtab_errf(ctx,
  5253. "Depends on file '" UPB_STRVIEW_FORMAT
  5254. "', but it has not been loaded",
  5255. UPB_STRVIEW_ARGS(dep_name));
  5256. }
  5257. file->deps[i] = upb_value_getconstptr(v);
  5258. }
  5259. /* Create messages. */
  5260. msgs = google_protobuf_FileDescriptorProto_message_type(file_proto, &n);
  5261. for (i = 0; i < n; i++) {
  5262. create_msgdef(ctx, file->package, msgs[i]);
  5263. }
  5264. /* Create enums. */
  5265. enums = google_protobuf_FileDescriptorProto_enum_type(file_proto, &n);
  5266. for (i = 0; i < n; i++) {
  5267. create_enumdef(ctx, file->package, enums[i]);
  5268. }
  5269. /* Create extensions. */
  5270. exts = google_protobuf_FileDescriptorProto_extension(file_proto, &n);
  5271. file->exts = symtab_alloc(ctx, sizeof(*file->exts) * n);
  5272. for (i = 0; i < n; i++) {
  5273. create_fielddef(ctx, file->package, NULL, exts[i]);
  5274. }
  5275. /* Now that all names are in the table, build layouts and resolve refs. */
  5276. for (i = 0; i < (size_t)file->ext_count; i++) {
  5277. resolve_fielddef(ctx, file->package, (upb_fielddef*)&file->exts[i]);
  5278. }
  5279. for (i = 0; i < (size_t)file->msg_count; i++) {
  5280. const upb_msgdef *m = &file->msgs[i];
  5281. int j;
  5282. for (j = 0; j < m->field_count; j++) {
  5283. resolve_fielddef(ctx, m->full_name, (upb_fielddef*)&m->fields[j]);
  5284. }
  5285. }
  5286. if (!ctx->layouts) {
  5287. for (i = 0; i < (size_t)file->msg_count; i++) {
  5288. const upb_msgdef *m = &file->msgs[i];
  5289. make_layout(ctx, m);
  5290. }
  5291. }
  5292. }
  5293. static void remove_filedef(upb_symtab *s, upb_filedef *file) {
  5294. upb_alloc *alloc = upb_arena_alloc(s->arena);
  5295. int i;
  5296. for (i = 0; i < file->msg_count; i++) {
  5297. const char *name = file->msgs[i].full_name;
  5298. upb_strtable_remove3(&s->syms, name, strlen(name), NULL, alloc);
  5299. }
  5300. for (i = 0; i < file->enum_count; i++) {
  5301. const char *name = file->enums[i].full_name;
  5302. upb_strtable_remove3(&s->syms, name, strlen(name), NULL, alloc);
  5303. }
  5304. for (i = 0; i < file->ext_count; i++) {
  5305. const char *name = file->exts[i].full_name;
  5306. upb_strtable_remove3(&s->syms, name, strlen(name), NULL, alloc);
  5307. }
  5308. }
  5309. static const upb_filedef *_upb_symtab_addfile(
  5310. upb_symtab *s, const google_protobuf_FileDescriptorProto *file_proto,
  5311. const upb_msglayout **layouts, upb_status *status) {
  5312. upb_arena *file_arena = upb_arena_new();
  5313. upb_filedef *file;
  5314. symtab_addctx ctx;
  5315. if (!file_arena) return NULL;
  5316. file = upb_arena_malloc(file_arena, sizeof(*file));
  5317. if (!file) goto done;
  5318. ctx.file = file;
  5319. ctx.symtab = s;
  5320. ctx.file_arena = file_arena;
  5321. ctx.alloc = upb_arena_alloc(file_arena);
  5322. ctx.layouts = layouts;
  5323. ctx.status = status;
  5324. file->msg_count = 0;
  5325. file->enum_count = 0;
  5326. file->ext_count = 0;
  5327. file->symtab = s;
  5328. if (UPB_UNLIKELY(UPB_SETJMP(ctx.err))) {
  5329. UPB_ASSERT(!upb_ok(status));
  5330. remove_filedef(s, file);
  5331. file = NULL;
  5332. } else {
  5333. build_filedef(&ctx, file, file_proto);
  5334. upb_strtable_insert3(&s->files, file->name, strlen(file->name),
  5335. upb_value_constptr(file), ctx.alloc);
  5336. UPB_ASSERT(upb_ok(status));
  5337. upb_arena_fuse(s->arena, file_arena);
  5338. }
  5339. done:
  5340. upb_arena_free(file_arena);
  5341. return file;
  5342. }
  5343. const upb_filedef *upb_symtab_addfile(
  5344. upb_symtab *s, const google_protobuf_FileDescriptorProto *file_proto,
  5345. upb_status *status) {
  5346. return _upb_symtab_addfile(s, file_proto, NULL, status);
  5347. }
  5348. /* Include here since we want most of this file to be stdio-free. */
  5349. #include <stdio.h>
  5350. bool _upb_symtab_loaddefinit(upb_symtab *s, const upb_def_init *init) {
  5351. /* Since this function should never fail (it would indicate a bug in upb) we
  5352. * print errors to stderr instead of returning error status to the user. */
  5353. upb_def_init **deps = init->deps;
  5354. google_protobuf_FileDescriptorProto *file;
  5355. upb_arena *arena;
  5356. upb_status status;
  5357. upb_status_clear(&status);
  5358. if (upb_strtable_lookup(&s->files, init->filename, NULL)) {
  5359. return true;
  5360. }
  5361. arena = upb_arena_new();
  5362. for (; *deps; deps++) {
  5363. if (!_upb_symtab_loaddefinit(s, *deps)) goto err;
  5364. }
  5365. file = google_protobuf_FileDescriptorProto_parse_ex(
  5366. init->descriptor.data, init->descriptor.size, arena, UPB_DECODE_ALIAS);
  5367. s->bytes_loaded += init->descriptor.size;
  5368. if (!file) {
  5369. upb_status_seterrf(
  5370. &status,
  5371. "Failed to parse compiled-in descriptor for file '%s'. This should "
  5372. "never happen.",
  5373. init->filename);
  5374. goto err;
  5375. }
  5376. if (!_upb_symtab_addfile(s, file, init->layouts, &status)) goto err;
  5377. upb_arena_free(arena);
  5378. return true;
  5379. err:
  5380. fprintf(stderr, "Error loading compiled-in descriptor: %s\n",
  5381. upb_status_errmsg(&status));
  5382. upb_arena_free(arena);
  5383. return false;
  5384. }
  5385. size_t _upb_symtab_bytesloaded(const upb_symtab *s) {
  5386. return s->bytes_loaded;
  5387. }
  5388. upb_arena *_upb_symtab_arena(const upb_symtab *s) {
  5389. return s->arena;
  5390. }
  5391. #undef CHK_OOM
  5392. #include <string.h>
  5393. static size_t get_field_size(const upb_msglayout_field *f) {
  5394. static unsigned char sizes[] = {
  5395. 0,/* 0 */
  5396. 8, /* UPB_DESCRIPTOR_TYPE_DOUBLE */
  5397. 4, /* UPB_DESCRIPTOR_TYPE_FLOAT */
  5398. 8, /* UPB_DESCRIPTOR_TYPE_INT64 */
  5399. 8, /* UPB_DESCRIPTOR_TYPE_UINT64 */
  5400. 4, /* UPB_DESCRIPTOR_TYPE_INT32 */
  5401. 8, /* UPB_DESCRIPTOR_TYPE_FIXED64 */
  5402. 4, /* UPB_DESCRIPTOR_TYPE_FIXED32 */
  5403. 1, /* UPB_DESCRIPTOR_TYPE_BOOL */
  5404. sizeof(upb_strview), /* UPB_DESCRIPTOR_TYPE_STRING */
  5405. sizeof(void*), /* UPB_DESCRIPTOR_TYPE_GROUP */
  5406. sizeof(void*), /* UPB_DESCRIPTOR_TYPE_MESSAGE */
  5407. sizeof(upb_strview), /* UPB_DESCRIPTOR_TYPE_BYTES */
  5408. 4, /* UPB_DESCRIPTOR_TYPE_UINT32 */
  5409. 4, /* UPB_DESCRIPTOR_TYPE_ENUM */
  5410. 4, /* UPB_DESCRIPTOR_TYPE_SFIXED32 */
  5411. 8, /* UPB_DESCRIPTOR_TYPE_SFIXED64 */
  5412. 4, /* UPB_DESCRIPTOR_TYPE_SINT32 */
  5413. 8, /* UPB_DESCRIPTOR_TYPE_SINT64 */
  5414. };
  5415. return _upb_repeated_or_map(f) ? sizeof(void *) : sizes[f->descriptortype];
  5416. }
  5417. /* Strings/bytes are special-cased in maps. */
  5418. static char _upb_fieldtype_to_mapsize[12] = {
  5419. 0,
  5420. 1, /* UPB_TYPE_BOOL */
  5421. 4, /* UPB_TYPE_FLOAT */
  5422. 4, /* UPB_TYPE_INT32 */
  5423. 4, /* UPB_TYPE_UINT32 */
  5424. 4, /* UPB_TYPE_ENUM */
  5425. sizeof(void*), /* UPB_TYPE_MESSAGE */
  5426. 8, /* UPB_TYPE_DOUBLE */
  5427. 8, /* UPB_TYPE_INT64 */
  5428. 8, /* UPB_TYPE_UINT64 */
  5429. 0, /* UPB_TYPE_STRING */
  5430. 0, /* UPB_TYPE_BYTES */
  5431. };
  5432. static const char _upb_fieldtype_to_sizelg2[12] = {
  5433. 0,
  5434. 0, /* UPB_TYPE_BOOL */
  5435. 2, /* UPB_TYPE_FLOAT */
  5436. 2, /* UPB_TYPE_INT32 */
  5437. 2, /* UPB_TYPE_UINT32 */
  5438. 2, /* UPB_TYPE_ENUM */
  5439. UPB_SIZE(2, 3), /* UPB_TYPE_MESSAGE */
  5440. 3, /* UPB_TYPE_DOUBLE */
  5441. 3, /* UPB_TYPE_INT64 */
  5442. 3, /* UPB_TYPE_UINT64 */
  5443. UPB_SIZE(3, 4), /* UPB_TYPE_STRING */
  5444. UPB_SIZE(3, 4), /* UPB_TYPE_BYTES */
  5445. };
  5446. /** upb_msg *******************************************************************/
  5447. upb_msg *upb_msg_new(const upb_msgdef *m, upb_arena *a) {
  5448. return _upb_msg_new(upb_msgdef_layout(m), a);
  5449. }
  5450. static bool in_oneof(const upb_msglayout_field *field) {
  5451. return field->presence < 0;
  5452. }
  5453. static upb_msgval _upb_msg_getraw(const upb_msg *msg, const upb_fielddef *f) {
  5454. const upb_msglayout_field *field = upb_fielddef_layout(f);
  5455. const char *mem = UPB_PTR_AT(msg, field->offset, char);
  5456. upb_msgval val = {0};
  5457. memcpy(&val, mem, get_field_size(field));
  5458. return val;
  5459. }
  5460. bool upb_msg_has(const upb_msg *msg, const upb_fielddef *f) {
  5461. const upb_msglayout_field *field = upb_fielddef_layout(f);
  5462. if (in_oneof(field)) {
  5463. return _upb_getoneofcase_field(msg, field) == field->number;
  5464. } else if (field->presence > 0) {
  5465. return _upb_hasbit_field(msg, field);
  5466. } else {
  5467. UPB_ASSERT(field->descriptortype == UPB_DESCRIPTOR_TYPE_MESSAGE ||
  5468. field->descriptortype == UPB_DESCRIPTOR_TYPE_GROUP);
  5469. return _upb_msg_getraw(msg, f).msg_val != NULL;
  5470. }
  5471. }
  5472. const upb_fielddef *upb_msg_whichoneof(const upb_msg *msg,
  5473. const upb_oneofdef *o) {
  5474. const upb_fielddef *f = upb_oneofdef_field(o, 0);
  5475. if (upb_oneofdef_issynthetic(o)) {
  5476. UPB_ASSERT(upb_oneofdef_fieldcount(o) == 1);
  5477. return upb_msg_has(msg, f) ? f : NULL;
  5478. } else {
  5479. const upb_msglayout_field *field = upb_fielddef_layout(f);
  5480. uint32_t oneof_case = _upb_getoneofcase_field(msg, field);
  5481. f = oneof_case ? upb_oneofdef_itof(o, oneof_case) : NULL;
  5482. UPB_ASSERT((f != NULL) == (oneof_case != 0));
  5483. return f;
  5484. }
  5485. }
  5486. upb_msgval upb_msg_get(const upb_msg *msg, const upb_fielddef *f) {
  5487. if (!upb_fielddef_haspresence(f) || upb_msg_has(msg, f)) {
  5488. return _upb_msg_getraw(msg, f);
  5489. } else {
  5490. /* TODO(haberman): change upb_fielddef to not require this switch(). */
  5491. upb_msgval val = {0};
  5492. switch (upb_fielddef_type(f)) {
  5493. case UPB_TYPE_INT32:
  5494. case UPB_TYPE_ENUM:
  5495. val.int32_val = upb_fielddef_defaultint32(f);
  5496. break;
  5497. case UPB_TYPE_INT64:
  5498. val.int64_val = upb_fielddef_defaultint64(f);
  5499. break;
  5500. case UPB_TYPE_UINT32:
  5501. val.uint32_val = upb_fielddef_defaultuint32(f);
  5502. break;
  5503. case UPB_TYPE_UINT64:
  5504. val.uint64_val = upb_fielddef_defaultuint64(f);
  5505. break;
  5506. case UPB_TYPE_FLOAT:
  5507. val.float_val = upb_fielddef_defaultfloat(f);
  5508. break;
  5509. case UPB_TYPE_DOUBLE:
  5510. val.double_val = upb_fielddef_defaultdouble(f);
  5511. break;
  5512. case UPB_TYPE_BOOL:
  5513. val.bool_val = upb_fielddef_defaultbool(f);
  5514. break;
  5515. case UPB_TYPE_STRING:
  5516. case UPB_TYPE_BYTES:
  5517. val.str_val.data = upb_fielddef_defaultstr(f, &val.str_val.size);
  5518. break;
  5519. case UPB_TYPE_MESSAGE:
  5520. val.msg_val = NULL;
  5521. break;
  5522. }
  5523. return val;
  5524. }
  5525. }
  5526. upb_mutmsgval upb_msg_mutable(upb_msg *msg, const upb_fielddef *f,
  5527. upb_arena *a) {
  5528. const upb_msglayout_field *field = upb_fielddef_layout(f);
  5529. upb_mutmsgval ret;
  5530. char *mem = UPB_PTR_AT(msg, field->offset, char);
  5531. bool wrong_oneof =
  5532. in_oneof(field) && _upb_getoneofcase_field(msg, field) != field->number;
  5533. memcpy(&ret, mem, sizeof(void*));
  5534. if (a && (!ret.msg || wrong_oneof)) {
  5535. if (upb_fielddef_ismap(f)) {
  5536. const upb_msgdef *entry = upb_fielddef_msgsubdef(f);
  5537. const upb_fielddef *key = upb_msgdef_itof(entry, UPB_MAPENTRY_KEY);
  5538. const upb_fielddef *value = upb_msgdef_itof(entry, UPB_MAPENTRY_VALUE);
  5539. ret.map = upb_map_new(a, upb_fielddef_type(key), upb_fielddef_type(value));
  5540. } else if (upb_fielddef_isseq(f)) {
  5541. ret.array = upb_array_new(a, upb_fielddef_type(f));
  5542. } else {
  5543. UPB_ASSERT(upb_fielddef_issubmsg(f));
  5544. ret.msg = upb_msg_new(upb_fielddef_msgsubdef(f), a);
  5545. }
  5546. memcpy(mem, &ret, sizeof(void*));
  5547. if (wrong_oneof) {
  5548. *_upb_oneofcase_field(msg, field) = field->number;
  5549. } else if (field->presence > 0) {
  5550. _upb_sethas_field(msg, field);
  5551. }
  5552. }
  5553. return ret;
  5554. }
  5555. void upb_msg_set(upb_msg *msg, const upb_fielddef *f, upb_msgval val,
  5556. upb_arena *a) {
  5557. const upb_msglayout_field *field = upb_fielddef_layout(f);
  5558. char *mem = UPB_PTR_AT(msg, field->offset, char);
  5559. UPB_UNUSED(a); /* We reserve the right to make set insert into a map. */
  5560. memcpy(mem, &val, get_field_size(field));
  5561. if (field->presence > 0) {
  5562. _upb_sethas_field(msg, field);
  5563. } else if (in_oneof(field)) {
  5564. *_upb_oneofcase_field(msg, field) = field->number;
  5565. }
  5566. }
  5567. void upb_msg_clearfield(upb_msg *msg, const upb_fielddef *f) {
  5568. const upb_msglayout_field *field = upb_fielddef_layout(f);
  5569. char *mem = UPB_PTR_AT(msg, field->offset, char);
  5570. if (field->presence > 0) {
  5571. _upb_clearhas_field(msg, field);
  5572. } else if (in_oneof(field)) {
  5573. uint32_t *oneof_case = _upb_oneofcase_field(msg, field);
  5574. if (*oneof_case != field->number) return;
  5575. *oneof_case = 0;
  5576. }
  5577. memset(mem, 0, get_field_size(field));
  5578. }
  5579. void upb_msg_clear(upb_msg *msg, const upb_msgdef *m) {
  5580. _upb_msg_clear(msg, upb_msgdef_layout(m));
  5581. }
  5582. bool upb_msg_next(const upb_msg *msg, const upb_msgdef *m,
  5583. const upb_symtab *ext_pool, const upb_fielddef **out_f,
  5584. upb_msgval *out_val, size_t *iter) {
  5585. int i = *iter;
  5586. int n = upb_msgdef_fieldcount(m);
  5587. const upb_msgval zero = {0};
  5588. UPB_UNUSED(ext_pool);
  5589. while (++i < n) {
  5590. const upb_fielddef *f = upb_msgdef_field(m, i);
  5591. upb_msgval val = _upb_msg_getraw(msg, f);
  5592. /* Skip field if unset or empty. */
  5593. if (upb_fielddef_haspresence(f)) {
  5594. if (!upb_msg_has(msg, f)) continue;
  5595. } else {
  5596. upb_msgval test = val;
  5597. if (upb_fielddef_isstring(f) && !upb_fielddef_isseq(f)) {
  5598. /* Clear string pointer, only size matters (ptr could be non-NULL). */
  5599. test.str_val.data = NULL;
  5600. }
  5601. /* Continue if NULL or 0. */
  5602. if (memcmp(&test, &zero, sizeof(test)) == 0) continue;
  5603. /* Continue on empty array or map. */
  5604. if (upb_fielddef_ismap(f)) {
  5605. if (upb_map_size(test.map_val) == 0) continue;
  5606. } else if (upb_fielddef_isseq(f)) {
  5607. if (upb_array_size(test.array_val) == 0) continue;
  5608. }
  5609. }
  5610. *out_val = val;
  5611. *out_f = f;
  5612. *iter = i;
  5613. return true;
  5614. }
  5615. *iter = i;
  5616. return false;
  5617. }
  5618. bool _upb_msg_discardunknown(upb_msg *msg, const upb_msgdef *m, int depth) {
  5619. size_t iter = UPB_MSG_BEGIN;
  5620. const upb_fielddef *f;
  5621. upb_msgval val;
  5622. bool ret = true;
  5623. if (--depth == 0) return false;
  5624. _upb_msg_discardunknown_shallow(msg);
  5625. while (upb_msg_next(msg, m, NULL /*ext_pool*/, &f, &val, &iter)) {
  5626. const upb_msgdef *subm = upb_fielddef_msgsubdef(f);
  5627. if (!subm) continue;
  5628. if (upb_fielddef_ismap(f)) {
  5629. const upb_fielddef *val_f = upb_msgdef_itof(subm, 2);
  5630. const upb_msgdef *val_m = upb_fielddef_msgsubdef(val_f);
  5631. upb_map *map = (upb_map*)val.map_val;
  5632. size_t iter = UPB_MAP_BEGIN;
  5633. if (!val_m) continue;
  5634. while (upb_mapiter_next(map, &iter)) {
  5635. upb_msgval map_val = upb_mapiter_value(map, iter);
  5636. if (!_upb_msg_discardunknown((upb_msg*)map_val.msg_val, val_m, depth)) {
  5637. ret = false;
  5638. }
  5639. }
  5640. } else if (upb_fielddef_isseq(f)) {
  5641. const upb_array *arr = val.array_val;
  5642. size_t i, n = upb_array_size(arr);
  5643. for (i = 0; i < n; i++) {
  5644. upb_msgval elem = upb_array_get(arr, i);
  5645. if (!_upb_msg_discardunknown((upb_msg*)elem.msg_val, subm, depth)) {
  5646. ret = false;
  5647. }
  5648. }
  5649. } else {
  5650. if (!_upb_msg_discardunknown((upb_msg*)val.msg_val, subm, depth)) {
  5651. ret = false;
  5652. }
  5653. }
  5654. }
  5655. return ret;
  5656. }
  5657. bool upb_msg_discardunknown(upb_msg *msg, const upb_msgdef *m, int maxdepth) {
  5658. return _upb_msg_discardunknown(msg, m, maxdepth);
  5659. }
  5660. /** upb_array *****************************************************************/
  5661. upb_array *upb_array_new(upb_arena *a, upb_fieldtype_t type) {
  5662. return _upb_array_new(a, 4, _upb_fieldtype_to_sizelg2[type]);
  5663. }
  5664. size_t upb_array_size(const upb_array *arr) {
  5665. return arr->len;
  5666. }
  5667. upb_msgval upb_array_get(const upb_array *arr, size_t i) {
  5668. upb_msgval ret;
  5669. const char* data = _upb_array_constptr(arr);
  5670. int lg2 = arr->data & 7;
  5671. UPB_ASSERT(i < arr->len);
  5672. memcpy(&ret, data + (i << lg2), 1 << lg2);
  5673. return ret;
  5674. }
  5675. void upb_array_set(upb_array *arr, size_t i, upb_msgval val) {
  5676. char* data = _upb_array_ptr(arr);
  5677. int lg2 = arr->data & 7;
  5678. UPB_ASSERT(i < arr->len);
  5679. memcpy(data + (i << lg2), &val, 1 << lg2);
  5680. }
  5681. bool upb_array_append(upb_array *arr, upb_msgval val, upb_arena *arena) {
  5682. if (!_upb_array_realloc(arr, arr->len + 1, arena)) {
  5683. return false;
  5684. }
  5685. arr->len++;
  5686. upb_array_set(arr, arr->len - 1, val);
  5687. return true;
  5688. }
  5689. bool upb_array_resize(upb_array *arr, size_t size, upb_arena *arena) {
  5690. return _upb_array_resize(arr, size, arena);
  5691. }
  5692. /** upb_map *******************************************************************/
  5693. upb_map *upb_map_new(upb_arena *a, upb_fieldtype_t key_type,
  5694. upb_fieldtype_t value_type) {
  5695. return _upb_map_new(a, _upb_fieldtype_to_mapsize[key_type],
  5696. _upb_fieldtype_to_mapsize[value_type]);
  5697. }
  5698. size_t upb_map_size(const upb_map *map) {
  5699. return _upb_map_size(map);
  5700. }
  5701. bool upb_map_get(const upb_map *map, upb_msgval key, upb_msgval *val) {
  5702. return _upb_map_get(map, &key, map->key_size, val, map->val_size);
  5703. }
  5704. void upb_map_clear(upb_map *map) {
  5705. _upb_map_clear(map);
  5706. }
  5707. bool upb_map_set(upb_map *map, upb_msgval key, upb_msgval val,
  5708. upb_arena *arena) {
  5709. return _upb_map_set(map, &key, map->key_size, &val, map->val_size, arena);
  5710. }
  5711. bool upb_map_delete(upb_map *map, upb_msgval key) {
  5712. return _upb_map_delete(map, &key, map->key_size);
  5713. }
  5714. bool upb_mapiter_next(const upb_map *map, size_t *iter) {
  5715. return _upb_map_next(map, iter);
  5716. }
  5717. bool upb_mapiter_done(const upb_map *map, size_t iter) {
  5718. upb_strtable_iter i;
  5719. UPB_ASSERT(iter != UPB_MAP_BEGIN);
  5720. i.t = &map->table;
  5721. i.index = iter;
  5722. return upb_strtable_done(&i);
  5723. }
  5724. /* Returns the key and value for this entry of the map. */
  5725. upb_msgval upb_mapiter_key(const upb_map *map, size_t iter) {
  5726. upb_strtable_iter i;
  5727. upb_msgval ret;
  5728. i.t = &map->table;
  5729. i.index = iter;
  5730. _upb_map_fromkey(upb_strtable_iter_key(&i), &ret, map->key_size);
  5731. return ret;
  5732. }
  5733. upb_msgval upb_mapiter_value(const upb_map *map, size_t iter) {
  5734. upb_strtable_iter i;
  5735. upb_msgval ret;
  5736. i.t = &map->table;
  5737. i.index = iter;
  5738. _upb_map_fromvalue(upb_strtable_iter_value(&i), &ret, map->val_size);
  5739. return ret;
  5740. }
  5741. /* void upb_mapiter_setvalue(upb_map *map, size_t iter, upb_msgval value); */
  5742. #include <errno.h>
  5743. #include <float.h>
  5744. #include <inttypes.h>
  5745. #include <limits.h>
  5746. #include <math.h>
  5747. #include <setjmp.h>
  5748. #include <stdlib.h>
  5749. #include <string.h>
  5750. /* Special header, must be included last. */
  5751. typedef struct {
  5752. const char *ptr, *end;
  5753. upb_arena *arena; /* TODO: should we have a tmp arena for tmp data? */
  5754. const upb_symtab *any_pool;
  5755. int depth;
  5756. upb_status *status;
  5757. jmp_buf err;
  5758. int line;
  5759. const char *line_begin;
  5760. bool is_first;
  5761. int options;
  5762. const upb_fielddef *debug_field;
  5763. } jsondec;
  5764. enum { JD_OBJECT, JD_ARRAY, JD_STRING, JD_NUMBER, JD_TRUE, JD_FALSE, JD_NULL };
  5765. /* Forward declarations of mutually-recursive functions. */
  5766. static void jsondec_wellknown(jsondec *d, upb_msg *msg, const upb_msgdef *m);
  5767. static upb_msgval jsondec_value(jsondec *d, const upb_fielddef *f);
  5768. static void jsondec_wellknownvalue(jsondec *d, upb_msg *msg,
  5769. const upb_msgdef *m);
  5770. static void jsondec_object(jsondec *d, upb_msg *msg, const upb_msgdef *m);
  5771. static bool jsondec_streql(upb_strview str, const char *lit) {
  5772. return str.size == strlen(lit) && memcmp(str.data, lit, str.size) == 0;
  5773. }
  5774. static bool jsondec_isnullvalue(const upb_fielddef *f) {
  5775. return upb_fielddef_type(f) == UPB_TYPE_ENUM &&
  5776. strcmp(upb_enumdef_fullname(upb_fielddef_enumsubdef(f)),
  5777. "google.protobuf.NullValue") == 0;
  5778. }
  5779. static bool jsondec_isvalue(const upb_fielddef *f) {
  5780. return (upb_fielddef_type(f) == UPB_TYPE_MESSAGE &&
  5781. upb_msgdef_wellknowntype(upb_fielddef_msgsubdef(f)) ==
  5782. UPB_WELLKNOWN_VALUE) ||
  5783. jsondec_isnullvalue(f);
  5784. }
  5785. UPB_NORETURN static void jsondec_err(jsondec *d, const char *msg) {
  5786. upb_status_seterrf(d->status, "Error parsing JSON @%d:%d: %s", d->line,
  5787. (int)(d->ptr - d->line_begin), msg);
  5788. UPB_LONGJMP(d->err, 1);
  5789. }
  5790. UPB_PRINTF(2, 3)
  5791. UPB_NORETURN static void jsondec_errf(jsondec *d, const char *fmt, ...) {
  5792. va_list argp;
  5793. upb_status_seterrf(d->status, "Error parsing JSON @%d:%d: ", d->line,
  5794. (int)(d->ptr - d->line_begin));
  5795. va_start(argp, fmt);
  5796. upb_status_vappenderrf(d->status, fmt, argp);
  5797. va_end(argp);
  5798. UPB_LONGJMP(d->err, 1);
  5799. }
  5800. static void jsondec_skipws(jsondec *d) {
  5801. while (d->ptr != d->end) {
  5802. switch (*d->ptr) {
  5803. case '\n':
  5804. d->line++;
  5805. d->line_begin = d->ptr;
  5806. /* Fallthrough. */
  5807. case '\r':
  5808. case '\t':
  5809. case ' ':
  5810. d->ptr++;
  5811. break;
  5812. default:
  5813. return;
  5814. }
  5815. }
  5816. jsondec_err(d, "Unexpected EOF");
  5817. }
  5818. static bool jsondec_tryparsech(jsondec *d, char ch) {
  5819. if (d->ptr == d->end || *d->ptr != ch) return false;
  5820. d->ptr++;
  5821. return true;
  5822. }
  5823. static void jsondec_parselit(jsondec *d, const char *lit) {
  5824. size_t avail = d->end - d->ptr;
  5825. size_t len = strlen(lit);
  5826. if (avail < len || memcmp(d->ptr, lit, len) != 0) {
  5827. jsondec_errf(d, "Expected: '%s'", lit);
  5828. }
  5829. d->ptr += len;
  5830. }
  5831. static void jsondec_wsch(jsondec *d, char ch) {
  5832. jsondec_skipws(d);
  5833. if (!jsondec_tryparsech(d, ch)) {
  5834. jsondec_errf(d, "Expected: '%c'", ch);
  5835. }
  5836. }
  5837. static void jsondec_true(jsondec *d) { jsondec_parselit(d, "true"); }
  5838. static void jsondec_false(jsondec *d) { jsondec_parselit(d, "false"); }
  5839. static void jsondec_null(jsondec *d) { jsondec_parselit(d, "null"); }
  5840. static void jsondec_entrysep(jsondec *d) {
  5841. jsondec_skipws(d);
  5842. jsondec_parselit(d, ":");
  5843. }
  5844. static int jsondec_rawpeek(jsondec *d) {
  5845. switch (*d->ptr) {
  5846. case '{':
  5847. return JD_OBJECT;
  5848. case '[':
  5849. return JD_ARRAY;
  5850. case '"':
  5851. return JD_STRING;
  5852. case '-':
  5853. case '0':
  5854. case '1':
  5855. case '2':
  5856. case '3':
  5857. case '4':
  5858. case '5':
  5859. case '6':
  5860. case '7':
  5861. case '8':
  5862. case '9':
  5863. return JD_NUMBER;
  5864. case 't':
  5865. return JD_TRUE;
  5866. case 'f':
  5867. return JD_FALSE;
  5868. case 'n':
  5869. return JD_NULL;
  5870. default:
  5871. jsondec_errf(d, "Unexpected character: '%c'", *d->ptr);
  5872. }
  5873. }
  5874. /* JSON object/array **********************************************************/
  5875. /* These are used like so:
  5876. *
  5877. * jsondec_objstart(d);
  5878. * while (jsondec_objnext(d)) {
  5879. * ...
  5880. * }
  5881. * jsondec_objend(d) */
  5882. static int jsondec_peek(jsondec *d) {
  5883. jsondec_skipws(d);
  5884. return jsondec_rawpeek(d);
  5885. }
  5886. static void jsondec_push(jsondec *d) {
  5887. if (--d->depth < 0) {
  5888. jsondec_err(d, "Recursion limit exceeded");
  5889. }
  5890. d->is_first = true;
  5891. }
  5892. static bool jsondec_seqnext(jsondec *d, char end_ch) {
  5893. bool is_first = d->is_first;
  5894. d->is_first = false;
  5895. jsondec_skipws(d);
  5896. if (*d->ptr == end_ch) return false;
  5897. if (!is_first) jsondec_parselit(d, ",");
  5898. return true;
  5899. }
  5900. static void jsondec_arrstart(jsondec *d) {
  5901. jsondec_push(d);
  5902. jsondec_wsch(d, '[');
  5903. }
  5904. static void jsondec_arrend(jsondec *d) {
  5905. d->depth++;
  5906. jsondec_wsch(d, ']');
  5907. }
  5908. static bool jsondec_arrnext(jsondec *d) {
  5909. return jsondec_seqnext(d, ']');
  5910. }
  5911. static void jsondec_objstart(jsondec *d) {
  5912. jsondec_push(d);
  5913. jsondec_wsch(d, '{');
  5914. }
  5915. static void jsondec_objend(jsondec *d) {
  5916. d->depth++;
  5917. jsondec_wsch(d, '}');
  5918. }
  5919. static bool jsondec_objnext(jsondec *d) {
  5920. if (!jsondec_seqnext(d, '}')) return false;
  5921. if (jsondec_peek(d) != JD_STRING) {
  5922. jsondec_err(d, "Object must start with string");
  5923. }
  5924. return true;
  5925. }
  5926. /* JSON number ****************************************************************/
  5927. static bool jsondec_tryskipdigits(jsondec *d) {
  5928. const char *start = d->ptr;
  5929. while (d->ptr < d->end) {
  5930. if (*d->ptr < '0' || *d->ptr > '9') {
  5931. break;
  5932. }
  5933. d->ptr++;
  5934. }
  5935. return d->ptr != start;
  5936. }
  5937. static void jsondec_skipdigits(jsondec *d) {
  5938. if (!jsondec_tryskipdigits(d)) {
  5939. jsondec_err(d, "Expected one or more digits");
  5940. }
  5941. }
  5942. static double jsondec_number(jsondec *d) {
  5943. const char *start = d->ptr;
  5944. assert(jsondec_rawpeek(d) == JD_NUMBER);
  5945. /* Skip over the syntax of a number, as specified by JSON. */
  5946. if (*d->ptr == '-') d->ptr++;
  5947. if (jsondec_tryparsech(d, '0')) {
  5948. if (jsondec_tryskipdigits(d)) {
  5949. jsondec_err(d, "number cannot have leading zero");
  5950. }
  5951. } else {
  5952. jsondec_skipdigits(d);
  5953. }
  5954. if (d->ptr == d->end) goto parse;
  5955. if (jsondec_tryparsech(d, '.')) {
  5956. jsondec_skipdigits(d);
  5957. }
  5958. if (d->ptr == d->end) goto parse;
  5959. if (*d->ptr == 'e' || *d->ptr == 'E') {
  5960. d->ptr++;
  5961. if (d->ptr == d->end) {
  5962. jsondec_err(d, "Unexpected EOF in number");
  5963. }
  5964. if (*d->ptr == '+' || *d->ptr == '-') {
  5965. d->ptr++;
  5966. }
  5967. jsondec_skipdigits(d);
  5968. }
  5969. parse:
  5970. /* Having verified the syntax of a JSON number, use strtod() to parse
  5971. * (strtod() accepts a superset of JSON syntax). */
  5972. errno = 0;
  5973. {
  5974. char* end;
  5975. double val = strtod(start, &end);
  5976. assert(end == d->ptr);
  5977. /* Currently the min/max-val conformance tests fail if we check this. Does
  5978. * this mean the conformance tests are wrong or strtod() is wrong, or
  5979. * something else? Investigate further. */
  5980. /*
  5981. if (errno == ERANGE) {
  5982. jsondec_err(d, "Number out of range");
  5983. }
  5984. */
  5985. if (val > DBL_MAX || val < -DBL_MAX) {
  5986. jsondec_err(d, "Number out of range");
  5987. }
  5988. return val;
  5989. }
  5990. }
  5991. /* JSON string ****************************************************************/
  5992. static char jsondec_escape(jsondec *d) {
  5993. switch (*d->ptr++) {
  5994. case '"':
  5995. return '\"';
  5996. case '\\':
  5997. return '\\';
  5998. case '/':
  5999. return '/';
  6000. case 'b':
  6001. return '\b';
  6002. case 'f':
  6003. return '\f';
  6004. case 'n':
  6005. return '\n';
  6006. case 'r':
  6007. return '\r';
  6008. case 't':
  6009. return '\t';
  6010. default:
  6011. jsondec_err(d, "Invalid escape char");
  6012. }
  6013. }
  6014. static uint32_t jsondec_codepoint(jsondec *d) {
  6015. uint32_t cp = 0;
  6016. const char *end;
  6017. if (d->end - d->ptr < 4) {
  6018. jsondec_err(d, "EOF inside string");
  6019. }
  6020. end = d->ptr + 4;
  6021. while (d->ptr < end) {
  6022. char ch = *d->ptr++;
  6023. if (ch >= '0' && ch <= '9') {
  6024. ch -= '0';
  6025. } else if (ch >= 'a' && ch <= 'f') {
  6026. ch = ch - 'a' + 10;
  6027. } else if (ch >= 'A' && ch <= 'F') {
  6028. ch = ch - 'A' + 10;
  6029. } else {
  6030. jsondec_err(d, "Invalid hex digit");
  6031. }
  6032. cp = (cp << 4) | ch;
  6033. }
  6034. return cp;
  6035. }
  6036. /* Parses a \uXXXX unicode escape (possibly a surrogate pair). */
  6037. static size_t jsondec_unicode(jsondec *d, char* out) {
  6038. uint32_t cp = jsondec_codepoint(d);
  6039. if (cp >= 0xd800 && cp <= 0xdbff) {
  6040. /* Surrogate pair: two 16-bit codepoints become a 32-bit codepoint. */
  6041. uint32_t high = cp;
  6042. uint32_t low;
  6043. jsondec_parselit(d, "\\u");
  6044. low = jsondec_codepoint(d);
  6045. if (low < 0xdc00 || low > 0xdfff) {
  6046. jsondec_err(d, "Invalid low surrogate");
  6047. }
  6048. cp = (high & 0x3ff) << 10;
  6049. cp |= (low & 0x3ff);
  6050. cp += 0x10000;
  6051. } else if (cp >= 0xdc00 && cp <= 0xdfff) {
  6052. jsondec_err(d, "Unpaired low surrogate");
  6053. }
  6054. /* Write to UTF-8 */
  6055. if (cp <= 0x7f) {
  6056. out[0] = cp;
  6057. return 1;
  6058. } else if (cp <= 0x07FF) {
  6059. out[0] = ((cp >> 6) & 0x1F) | 0xC0;
  6060. out[1] = ((cp >> 0) & 0x3F) | 0x80;
  6061. return 2;
  6062. } else if (cp <= 0xFFFF) {
  6063. out[0] = ((cp >> 12) & 0x0F) | 0xE0;
  6064. out[1] = ((cp >> 6) & 0x3F) | 0x80;
  6065. out[2] = ((cp >> 0) & 0x3F) | 0x80;
  6066. return 3;
  6067. } else if (cp < 0x10FFFF) {
  6068. out[0] = ((cp >> 18) & 0x07) | 0xF0;
  6069. out[1] = ((cp >> 12) & 0x3f) | 0x80;
  6070. out[2] = ((cp >> 6) & 0x3f) | 0x80;
  6071. out[3] = ((cp >> 0) & 0x3f) | 0x80;
  6072. return 4;
  6073. } else {
  6074. jsondec_err(d, "Invalid codepoint");
  6075. }
  6076. }
  6077. static void jsondec_resize(jsondec *d, char **buf, char **end, char **buf_end) {
  6078. size_t oldsize = *buf_end - *buf;
  6079. size_t len = *end - *buf;
  6080. size_t size = UPB_MAX(8, 2 * oldsize);
  6081. *buf = upb_arena_realloc(d->arena, *buf, len, size);
  6082. if (!*buf) jsondec_err(d, "Out of memory");
  6083. *end = *buf + len;
  6084. *buf_end = *buf + size;
  6085. }
  6086. static upb_strview jsondec_string(jsondec *d) {
  6087. char *buf = NULL;
  6088. char *end = NULL;
  6089. char *buf_end = NULL;
  6090. jsondec_skipws(d);
  6091. if (*d->ptr++ != '"') {
  6092. jsondec_err(d, "Expected string");
  6093. }
  6094. while (d->ptr < d->end) {
  6095. char ch = *d->ptr++;
  6096. if (end == buf_end) {
  6097. jsondec_resize(d, &buf, &end, &buf_end);
  6098. }
  6099. switch (ch) {
  6100. case '"': {
  6101. upb_strview ret;
  6102. ret.data = buf;
  6103. ret.size = end - buf;
  6104. *end = '\0'; /* Needed for possible strtod(). */
  6105. return ret;
  6106. }
  6107. case '\\':
  6108. if (d->ptr == d->end) goto eof;
  6109. if (*d->ptr == 'u') {
  6110. d->ptr++;
  6111. if (buf_end - end < 4) {
  6112. /* Allow space for maximum-sized code point (4 bytes). */
  6113. jsondec_resize(d, &buf, &end, &buf_end);
  6114. }
  6115. end += jsondec_unicode(d, end);
  6116. } else {
  6117. *end++ = jsondec_escape(d);
  6118. }
  6119. break;
  6120. default:
  6121. if ((unsigned char)*d->ptr < 0x20) {
  6122. jsondec_err(d, "Invalid char in JSON string");
  6123. }
  6124. *end++ = ch;
  6125. break;
  6126. }
  6127. }
  6128. eof:
  6129. jsondec_err(d, "EOF inside string");
  6130. }
  6131. static void jsondec_skipval(jsondec *d) {
  6132. switch (jsondec_peek(d)) {
  6133. case JD_OBJECT:
  6134. jsondec_objstart(d);
  6135. while (jsondec_objnext(d)) {
  6136. jsondec_string(d);
  6137. jsondec_entrysep(d);
  6138. jsondec_skipval(d);
  6139. }
  6140. jsondec_objend(d);
  6141. break;
  6142. case JD_ARRAY:
  6143. jsondec_arrstart(d);
  6144. while (jsondec_arrnext(d)) {
  6145. jsondec_skipval(d);
  6146. }
  6147. jsondec_arrend(d);
  6148. break;
  6149. case JD_TRUE:
  6150. jsondec_true(d);
  6151. break;
  6152. case JD_FALSE:
  6153. jsondec_false(d);
  6154. break;
  6155. case JD_NULL:
  6156. jsondec_null(d);
  6157. break;
  6158. case JD_STRING:
  6159. jsondec_string(d);
  6160. break;
  6161. case JD_NUMBER:
  6162. jsondec_number(d);
  6163. break;
  6164. }
  6165. }
  6166. /* Base64 decoding for bytes fields. ******************************************/
  6167. static unsigned int jsondec_base64_tablelookup(const char ch) {
  6168. /* Table includes the normal base64 chars plus the URL-safe variant. */
  6169. const signed char table[256] = {
  6170. -1, -1, -1, -1, -1, -1, -1,
  6171. -1, -1, -1, -1, -1, -1, -1,
  6172. -1, -1, -1, -1, -1, -1, -1,
  6173. -1, -1, -1, -1, -1, -1, -1,
  6174. -1, -1, -1, -1, -1, -1, -1,
  6175. -1, -1, -1, -1, -1, -1, -1,
  6176. -1, 62 /*+*/, -1, 62 /*-*/, -1, 63 /*/ */, 52 /*0*/,
  6177. 53 /*1*/, 54 /*2*/, 55 /*3*/, 56 /*4*/, 57 /*5*/, 58 /*6*/, 59 /*7*/,
  6178. 60 /*8*/, 61 /*9*/, -1, -1, -1, -1, -1,
  6179. -1, -1, 0 /*A*/, 1 /*B*/, 2 /*C*/, 3 /*D*/, 4 /*E*/,
  6180. 5 /*F*/, 6 /*G*/, 07 /*H*/, 8 /*I*/, 9 /*J*/, 10 /*K*/, 11 /*L*/,
  6181. 12 /*M*/, 13 /*N*/, 14 /*O*/, 15 /*P*/, 16 /*Q*/, 17 /*R*/, 18 /*S*/,
  6182. 19 /*T*/, 20 /*U*/, 21 /*V*/, 22 /*W*/, 23 /*X*/, 24 /*Y*/, 25 /*Z*/,
  6183. -1, -1, -1, -1, 63 /*_*/, -1, 26 /*a*/,
  6184. 27 /*b*/, 28 /*c*/, 29 /*d*/, 30 /*e*/, 31 /*f*/, 32 /*g*/, 33 /*h*/,
  6185. 34 /*i*/, 35 /*j*/, 36 /*k*/, 37 /*l*/, 38 /*m*/, 39 /*n*/, 40 /*o*/,
  6186. 41 /*p*/, 42 /*q*/, 43 /*r*/, 44 /*s*/, 45 /*t*/, 46 /*u*/, 47 /*v*/,
  6187. 48 /*w*/, 49 /*x*/, 50 /*y*/, 51 /*z*/, -1, -1, -1,
  6188. -1, -1, -1, -1, -1, -1, -1,
  6189. -1, -1, -1, -1, -1, -1, -1,
  6190. -1, -1, -1, -1, -1, -1, -1,
  6191. -1, -1, -1, -1, -1, -1, -1,
  6192. -1, -1, -1, -1, -1, -1, -1,
  6193. -1, -1, -1, -1, -1, -1, -1,
  6194. -1, -1, -1, -1, -1, -1, -1,
  6195. -1, -1, -1, -1, -1, -1, -1,
  6196. -1, -1, -1, -1, -1, -1, -1,
  6197. -1, -1, -1, -1, -1, -1, -1,
  6198. -1, -1, -1, -1, -1, -1, -1,
  6199. -1, -1, -1, -1, -1, -1, -1,
  6200. -1, -1, -1, -1, -1, -1, -1,
  6201. -1, -1, -1, -1, -1, -1, -1,
  6202. -1, -1, -1, -1, -1, -1, -1,
  6203. -1, -1, -1, -1, -1, -1, -1,
  6204. -1, -1, -1, -1, -1, -1, -1,
  6205. -1, -1, -1, -1, -1, -1, -1,
  6206. -1, -1, -1, -1};
  6207. /* Sign-extend return value so high bit will be set on any unexpected char. */
  6208. return table[(unsigned)ch];
  6209. }
  6210. static char *jsondec_partialbase64(jsondec *d, const char *ptr, const char *end,
  6211. char *out) {
  6212. int32_t val = -1;
  6213. switch (end - ptr) {
  6214. case 2:
  6215. val = jsondec_base64_tablelookup(ptr[0]) << 18 |
  6216. jsondec_base64_tablelookup(ptr[1]) << 12;
  6217. out[0] = val >> 16;
  6218. out += 1;
  6219. break;
  6220. case 3:
  6221. val = jsondec_base64_tablelookup(ptr[0]) << 18 |
  6222. jsondec_base64_tablelookup(ptr[1]) << 12 |
  6223. jsondec_base64_tablelookup(ptr[2]) << 6;
  6224. out[0] = val >> 16;
  6225. out[1] = (val >> 8) & 0xff;
  6226. out += 2;
  6227. break;
  6228. }
  6229. if (val < 0) {
  6230. jsondec_err(d, "Corrupt base64");
  6231. }
  6232. return out;
  6233. }
  6234. static size_t jsondec_base64(jsondec *d, upb_strview str) {
  6235. /* We decode in place. This is safe because this is a new buffer (not
  6236. * aliasing the input) and because base64 decoding shrinks 4 bytes into 3. */
  6237. char *out = (char*)str.data;
  6238. const char *ptr = str.data;
  6239. const char *end = ptr + str.size;
  6240. const char *end4 = ptr + (str.size & -4); /* Round down to multiple of 4. */
  6241. for (; ptr < end4; ptr += 4, out += 3) {
  6242. int val = jsondec_base64_tablelookup(ptr[0]) << 18 |
  6243. jsondec_base64_tablelookup(ptr[1]) << 12 |
  6244. jsondec_base64_tablelookup(ptr[2]) << 6 |
  6245. jsondec_base64_tablelookup(ptr[3]) << 0;
  6246. if (val < 0) {
  6247. /* Junk chars or padding. Remove trailing padding, if any. */
  6248. if (end - ptr == 4 && ptr[3] == '=') {
  6249. if (ptr[2] == '=') {
  6250. end -= 2;
  6251. } else {
  6252. end -= 1;
  6253. }
  6254. }
  6255. break;
  6256. }
  6257. out[0] = val >> 16;
  6258. out[1] = (val >> 8) & 0xff;
  6259. out[2] = val & 0xff;
  6260. }
  6261. if (ptr < end) {
  6262. /* Process remaining chars. We do not require padding. */
  6263. out = jsondec_partialbase64(d, ptr, end, out);
  6264. }
  6265. return out - str.data;
  6266. }
  6267. /* Low-level integer parsing **************************************************/
  6268. /* We use these hand-written routines instead of strto[u]l() because the "long
  6269. * long" variants aren't in c89. Also our version allows setting a ptr limit. */
  6270. static const char *jsondec_buftouint64(jsondec *d, const char *ptr,
  6271. const char *end, uint64_t *val) {
  6272. uint64_t u64 = 0;
  6273. while (ptr < end) {
  6274. unsigned ch = *ptr - '0';
  6275. if (ch >= 10) break;
  6276. if (u64 > UINT64_MAX / 10 || u64 * 10 > UINT64_MAX - ch) {
  6277. jsondec_err(d, "Integer overflow");
  6278. }
  6279. u64 *= 10;
  6280. u64 += ch;
  6281. ptr++;
  6282. }
  6283. *val = u64;
  6284. return ptr;
  6285. }
  6286. static const char *jsondec_buftoint64(jsondec *d, const char *ptr,
  6287. const char *end, int64_t *val) {
  6288. bool neg = false;
  6289. uint64_t u64;
  6290. if (ptr != end && *ptr == '-') {
  6291. ptr++;
  6292. neg = true;
  6293. }
  6294. ptr = jsondec_buftouint64(d, ptr, end, &u64);
  6295. if (u64 > (uint64_t)INT64_MAX + neg) {
  6296. jsondec_err(d, "Integer overflow");
  6297. }
  6298. *val = neg ? -u64 : u64;
  6299. return ptr;
  6300. }
  6301. static uint64_t jsondec_strtouint64(jsondec *d, upb_strview str) {
  6302. const char *end = str.data + str.size;
  6303. uint64_t ret;
  6304. if (jsondec_buftouint64(d, str.data, end, &ret) != end) {
  6305. jsondec_err(d, "Non-number characters in quoted integer");
  6306. }
  6307. return ret;
  6308. }
  6309. static int64_t jsondec_strtoint64(jsondec *d, upb_strview str) {
  6310. const char *end = str.data + str.size;
  6311. int64_t ret;
  6312. if (jsondec_buftoint64(d, str.data, end, &ret) != end) {
  6313. jsondec_err(d, "Non-number characters in quoted integer");
  6314. }
  6315. return ret;
  6316. }
  6317. /* Primitive value types ******************************************************/
  6318. /* Parse INT32 or INT64 value. */
  6319. static upb_msgval jsondec_int(jsondec *d, const upb_fielddef *f) {
  6320. upb_msgval val;
  6321. switch (jsondec_peek(d)) {
  6322. case JD_NUMBER: {
  6323. double dbl = jsondec_number(d);
  6324. if (dbl > 9223372036854774784.0 || dbl < -9223372036854775808.0) {
  6325. jsondec_err(d, "JSON number is out of range.");
  6326. }
  6327. val.int64_val = dbl; /* must be guarded, overflow here is UB */
  6328. if (val.int64_val != dbl) {
  6329. jsondec_errf(d, "JSON number was not integral (%f != %" PRId64 ")", dbl,
  6330. val.int64_val);
  6331. }
  6332. break;
  6333. }
  6334. case JD_STRING: {
  6335. upb_strview str = jsondec_string(d);
  6336. val.int64_val = jsondec_strtoint64(d, str);
  6337. break;
  6338. }
  6339. default:
  6340. jsondec_err(d, "Expected number or string");
  6341. }
  6342. if (upb_fielddef_type(f) == UPB_TYPE_INT32) {
  6343. if (val.int64_val > INT32_MAX || val.int64_val < INT32_MIN) {
  6344. jsondec_err(d, "Integer out of range.");
  6345. }
  6346. val.int32_val = (int32_t)val.int64_val;
  6347. }
  6348. return val;
  6349. }
  6350. /* Parse UINT32 or UINT64 value. */
  6351. static upb_msgval jsondec_uint(jsondec *d, const upb_fielddef *f) {
  6352. upb_msgval val = {0};
  6353. switch (jsondec_peek(d)) {
  6354. case JD_NUMBER: {
  6355. double dbl = jsondec_number(d);
  6356. if (dbl > 18446744073709549568.0 || dbl < 0) {
  6357. jsondec_err(d, "JSON number is out of range.");
  6358. }
  6359. val.uint64_val = dbl; /* must be guarded, overflow here is UB */
  6360. if (val.uint64_val != dbl) {
  6361. jsondec_errf(d, "JSON number was not integral (%f != %" PRIu64 ")", dbl,
  6362. val.uint64_val);
  6363. }
  6364. break;
  6365. }
  6366. case JD_STRING: {
  6367. upb_strview str = jsondec_string(d);
  6368. val.uint64_val = jsondec_strtouint64(d, str);
  6369. break;
  6370. }
  6371. default:
  6372. jsondec_err(d, "Expected number or string");
  6373. }
  6374. if (upb_fielddef_type(f) == UPB_TYPE_UINT32) {
  6375. if (val.uint64_val > UINT32_MAX) {
  6376. jsondec_err(d, "Integer out of range.");
  6377. }
  6378. val.uint32_val = (uint32_t)val.uint64_val;
  6379. }
  6380. return val;
  6381. }
  6382. /* Parse DOUBLE or FLOAT value. */
  6383. static upb_msgval jsondec_double(jsondec *d, const upb_fielddef *f) {
  6384. upb_strview str;
  6385. upb_msgval val = {0};
  6386. switch (jsondec_peek(d)) {
  6387. case JD_NUMBER:
  6388. val.double_val = jsondec_number(d);
  6389. break;
  6390. case JD_STRING:
  6391. str = jsondec_string(d);
  6392. if (jsondec_streql(str, "NaN")) {
  6393. val.double_val = NAN;
  6394. } else if (jsondec_streql(str, "Infinity")) {
  6395. val.double_val = INFINITY;
  6396. } else if (jsondec_streql(str, "-Infinity")) {
  6397. val.double_val = -INFINITY;
  6398. } else {
  6399. val.double_val = strtod(str.data, NULL);
  6400. }
  6401. break;
  6402. default:
  6403. jsondec_err(d, "Expected number or string");
  6404. }
  6405. if (upb_fielddef_type(f) == UPB_TYPE_FLOAT) {
  6406. if (val.double_val != INFINITY && val.double_val != -INFINITY &&
  6407. (val.double_val > FLT_MAX || val.double_val < -FLT_MAX)) {
  6408. jsondec_err(d, "Float out of range");
  6409. }
  6410. val.float_val = val.double_val;
  6411. }
  6412. return val;
  6413. }
  6414. /* Parse STRING or BYTES value. */
  6415. static upb_msgval jsondec_strfield(jsondec *d, const upb_fielddef *f) {
  6416. upb_msgval val;
  6417. val.str_val = jsondec_string(d);
  6418. if (upb_fielddef_type(f) == UPB_TYPE_BYTES) {
  6419. val.str_val.size = jsondec_base64(d, val.str_val);
  6420. }
  6421. return val;
  6422. }
  6423. static upb_msgval jsondec_enum(jsondec *d, const upb_fielddef *f) {
  6424. switch (jsondec_peek(d)) {
  6425. case JD_STRING: {
  6426. const upb_enumdef *e = upb_fielddef_enumsubdef(f);
  6427. upb_strview str = jsondec_string(d);
  6428. upb_msgval val;
  6429. if (!upb_enumdef_ntoi(e, str.data, str.size, &val.int32_val)) {
  6430. if (d->options & UPB_JSONDEC_IGNOREUNKNOWN) {
  6431. val.int32_val = 0;
  6432. } else {
  6433. jsondec_errf(d, "Unknown enumerator: '" UPB_STRVIEW_FORMAT "'",
  6434. UPB_STRVIEW_ARGS(str));
  6435. }
  6436. }
  6437. return val;
  6438. }
  6439. case JD_NULL: {
  6440. if (jsondec_isnullvalue(f)) {
  6441. upb_msgval val;
  6442. jsondec_null(d);
  6443. val.int32_val = 0;
  6444. return val;
  6445. }
  6446. }
  6447. /* Fallthrough. */
  6448. default:
  6449. return jsondec_int(d, f);
  6450. }
  6451. }
  6452. static upb_msgval jsondec_bool(jsondec *d, const upb_fielddef *f) {
  6453. bool is_map_key = upb_fielddef_number(f) == 1 &&
  6454. upb_msgdef_mapentry(upb_fielddef_containingtype(f));
  6455. upb_msgval val;
  6456. if (is_map_key) {
  6457. upb_strview str = jsondec_string(d);
  6458. if (jsondec_streql(str, "true")) {
  6459. val.bool_val = true;
  6460. } else if (jsondec_streql(str, "false")) {
  6461. val.bool_val = false;
  6462. } else {
  6463. jsondec_err(d, "Invalid boolean map key");
  6464. }
  6465. } else {
  6466. switch (jsondec_peek(d)) {
  6467. case JD_TRUE:
  6468. val.bool_val = true;
  6469. jsondec_true(d);
  6470. break;
  6471. case JD_FALSE:
  6472. val.bool_val = false;
  6473. jsondec_false(d);
  6474. break;
  6475. default:
  6476. jsondec_err(d, "Expected true or false");
  6477. }
  6478. }
  6479. return val;
  6480. }
  6481. /* Composite types (array/message/map) ****************************************/
  6482. static void jsondec_array(jsondec *d, upb_msg *msg, const upb_fielddef *f) {
  6483. upb_array *arr = upb_msg_mutable(msg, f, d->arena).array;
  6484. jsondec_arrstart(d);
  6485. while (jsondec_arrnext(d)) {
  6486. upb_msgval elem = jsondec_value(d, f);
  6487. upb_array_append(arr, elem, d->arena);
  6488. }
  6489. jsondec_arrend(d);
  6490. }
  6491. static void jsondec_map(jsondec *d, upb_msg *msg, const upb_fielddef *f) {
  6492. upb_map *map = upb_msg_mutable(msg, f, d->arena).map;
  6493. const upb_msgdef *entry = upb_fielddef_msgsubdef(f);
  6494. const upb_fielddef *key_f = upb_msgdef_itof(entry, 1);
  6495. const upb_fielddef *val_f = upb_msgdef_itof(entry, 2);
  6496. jsondec_objstart(d);
  6497. while (jsondec_objnext(d)) {
  6498. upb_msgval key, val;
  6499. key = jsondec_value(d, key_f);
  6500. jsondec_entrysep(d);
  6501. val = jsondec_value(d, val_f);
  6502. upb_map_set(map, key, val, d->arena);
  6503. }
  6504. jsondec_objend(d);
  6505. }
  6506. static void jsondec_tomsg(jsondec *d, upb_msg *msg, const upb_msgdef *m) {
  6507. if (upb_msgdef_wellknowntype(m) == UPB_WELLKNOWN_UNSPECIFIED) {
  6508. jsondec_object(d, msg, m);
  6509. } else {
  6510. jsondec_wellknown(d, msg, m);
  6511. }
  6512. }
  6513. static upb_msgval jsondec_msg(jsondec *d, const upb_fielddef *f) {
  6514. const upb_msgdef *m = upb_fielddef_msgsubdef(f);
  6515. upb_msg *msg = upb_msg_new(m, d->arena);
  6516. upb_msgval val;
  6517. jsondec_tomsg(d, msg, m);
  6518. val.msg_val = msg;
  6519. return val;
  6520. }
  6521. static void jsondec_field(jsondec *d, upb_msg *msg, const upb_msgdef *m) {
  6522. upb_strview name;
  6523. const upb_fielddef *f;
  6524. const upb_fielddef *preserved;
  6525. name = jsondec_string(d);
  6526. jsondec_entrysep(d);
  6527. f = upb_msgdef_lookupjsonname(m, name.data, name.size);
  6528. if (!f) {
  6529. if ((d->options & UPB_JSONDEC_IGNOREUNKNOWN) == 0) {
  6530. jsondec_errf(d, "No such field: " UPB_STRVIEW_FORMAT,
  6531. UPB_STRVIEW_ARGS(name));
  6532. }
  6533. jsondec_skipval(d);
  6534. return;
  6535. }
  6536. if (upb_fielddef_realcontainingoneof(f) &&
  6537. upb_msg_whichoneof(msg, upb_fielddef_containingoneof(f))) {
  6538. jsondec_err(d, "More than one field for this oneof.");
  6539. }
  6540. if (jsondec_peek(d) == JD_NULL && !jsondec_isvalue(f)) {
  6541. /* JSON "null" indicates a default value, so no need to set anything. */
  6542. jsondec_null(d);
  6543. return;
  6544. }
  6545. preserved = d->debug_field;
  6546. d->debug_field = f;
  6547. if (upb_fielddef_ismap(f)) {
  6548. jsondec_map(d, msg, f);
  6549. } else if (upb_fielddef_isseq(f)) {
  6550. jsondec_array(d, msg, f);
  6551. } else if (upb_fielddef_issubmsg(f)) {
  6552. upb_msg *submsg = upb_msg_mutable(msg, f, d->arena).msg;
  6553. const upb_msgdef *subm = upb_fielddef_msgsubdef(f);
  6554. jsondec_tomsg(d, submsg, subm);
  6555. } else {
  6556. upb_msgval val = jsondec_value(d, f);
  6557. upb_msg_set(msg, f, val, d->arena);
  6558. }
  6559. d->debug_field = preserved;
  6560. }
  6561. static void jsondec_object(jsondec *d, upb_msg *msg, const upb_msgdef *m) {
  6562. jsondec_objstart(d);
  6563. while (jsondec_objnext(d)) {
  6564. jsondec_field(d, msg, m);
  6565. }
  6566. jsondec_objend(d);
  6567. }
  6568. static upb_msgval jsondec_value(jsondec *d, const upb_fielddef *f) {
  6569. switch (upb_fielddef_type(f)) {
  6570. case UPB_TYPE_BOOL:
  6571. return jsondec_bool(d, f);
  6572. case UPB_TYPE_FLOAT:
  6573. case UPB_TYPE_DOUBLE:
  6574. return jsondec_double(d, f);
  6575. case UPB_TYPE_UINT32:
  6576. case UPB_TYPE_UINT64:
  6577. return jsondec_uint(d, f);
  6578. case UPB_TYPE_INT32:
  6579. case UPB_TYPE_INT64:
  6580. return jsondec_int(d, f);
  6581. case UPB_TYPE_STRING:
  6582. case UPB_TYPE_BYTES:
  6583. return jsondec_strfield(d, f);
  6584. case UPB_TYPE_ENUM:
  6585. return jsondec_enum(d, f);
  6586. case UPB_TYPE_MESSAGE:
  6587. return jsondec_msg(d, f);
  6588. default:
  6589. UPB_UNREACHABLE();
  6590. }
  6591. }
  6592. /* Well-known types ***********************************************************/
  6593. static int jsondec_tsdigits(jsondec *d, const char **ptr, size_t digits,
  6594. const char *after) {
  6595. uint64_t val;
  6596. const char *p = *ptr;
  6597. const char *end = p + digits;
  6598. size_t after_len = after ? strlen(after) : 0;
  6599. UPB_ASSERT(digits <= 9); /* int can't overflow. */
  6600. if (jsondec_buftouint64(d, p, end, &val) != end ||
  6601. (after_len && memcmp(end, after, after_len) != 0)) {
  6602. jsondec_err(d, "Malformed timestamp");
  6603. }
  6604. UPB_ASSERT(val < INT_MAX);
  6605. *ptr = end + after_len;
  6606. return (int)val;
  6607. }
  6608. static int jsondec_nanos(jsondec *d, const char **ptr, const char *end) {
  6609. uint64_t nanos = 0;
  6610. const char *p = *ptr;
  6611. if (p != end && *p == '.') {
  6612. const char *nano_end = jsondec_buftouint64(d, p + 1, end, &nanos);
  6613. int digits = (int)(nano_end - p - 1);
  6614. int exp_lg10 = 9 - digits;
  6615. if (digits > 9) {
  6616. jsondec_err(d, "Too many digits for partial seconds");
  6617. }
  6618. while (exp_lg10--) nanos *= 10;
  6619. *ptr = nano_end;
  6620. }
  6621. UPB_ASSERT(nanos < INT_MAX);
  6622. return (int)nanos;
  6623. }
  6624. /* jsondec_epochdays(1970, 1, 1) == 1970-01-01 == 0. */
  6625. int jsondec_epochdays(int y, int m, int d) {
  6626. const uint32_t year_base = 4800; /* Before min year, multiple of 400. */
  6627. const uint32_t m_adj = m - 3; /* March-based month. */
  6628. const uint32_t carry = m_adj > (uint32_t)m ? 1 : 0;
  6629. const uint32_t adjust = carry ? 12 : 0;
  6630. const uint32_t y_adj = y + year_base - carry;
  6631. const uint32_t month_days = ((m_adj + adjust) * 62719 + 769) / 2048;
  6632. const uint32_t leap_days = y_adj / 4 - y_adj / 100 + y_adj / 400;
  6633. return y_adj * 365 + leap_days + month_days + (d - 1) - 2472632;
  6634. }
  6635. static int64_t jsondec_unixtime(int y, int m, int d, int h, int min, int s) {
  6636. return (int64_t)jsondec_epochdays(y, m, d) * 86400 + h * 3600 + min * 60 + s;
  6637. }
  6638. static void jsondec_timestamp(jsondec *d, upb_msg *msg, const upb_msgdef *m) {
  6639. upb_msgval seconds;
  6640. upb_msgval nanos;
  6641. upb_strview str = jsondec_string(d);
  6642. const char *ptr = str.data;
  6643. const char *end = ptr + str.size;
  6644. if (str.size < 20) goto malformed;
  6645. {
  6646. /* 1972-01-01T01:00:00 */
  6647. int year = jsondec_tsdigits(d, &ptr, 4, "-");
  6648. int mon = jsondec_tsdigits(d, &ptr, 2, "-");
  6649. int day = jsondec_tsdigits(d, &ptr, 2, "T");
  6650. int hour = jsondec_tsdigits(d, &ptr, 2, ":");
  6651. int min = jsondec_tsdigits(d, &ptr, 2, ":");
  6652. int sec = jsondec_tsdigits(d, &ptr, 2, NULL);
  6653. seconds.int64_val = jsondec_unixtime(year, mon, day, hour, min, sec);
  6654. }
  6655. nanos.int32_val = jsondec_nanos(d, &ptr, end);
  6656. {
  6657. /* [+-]08:00 or Z */
  6658. int ofs = 0;
  6659. bool neg = false;
  6660. if (ptr == end) goto malformed;
  6661. switch (*ptr++) {
  6662. case '-':
  6663. neg = true;
  6664. /* fallthrough */
  6665. case '+':
  6666. if ((end - ptr) != 5) goto malformed;
  6667. ofs = jsondec_tsdigits(d, &ptr, 2, ":00");
  6668. ofs *= 60 * 60;
  6669. seconds.int64_val += (neg ? ofs : -ofs);
  6670. break;
  6671. case 'Z':
  6672. if (ptr != end) goto malformed;
  6673. break;
  6674. default:
  6675. goto malformed;
  6676. }
  6677. }
  6678. if (seconds.int64_val < -62135596800) {
  6679. jsondec_err(d, "Timestamp out of range");
  6680. }
  6681. upb_msg_set(msg, upb_msgdef_itof(m, 1), seconds, d->arena);
  6682. upb_msg_set(msg, upb_msgdef_itof(m, 2), nanos, d->arena);
  6683. return;
  6684. malformed:
  6685. jsondec_err(d, "Malformed timestamp");
  6686. }
  6687. static void jsondec_duration(jsondec *d, upb_msg *msg, const upb_msgdef *m) {
  6688. upb_msgval seconds;
  6689. upb_msgval nanos;
  6690. upb_strview str = jsondec_string(d);
  6691. const char *ptr = str.data;
  6692. const char *end = ptr + str.size;
  6693. const int64_t max = (uint64_t)3652500 * 86400;
  6694. /* "3.000000001s", "3s", etc. */
  6695. ptr = jsondec_buftoint64(d, ptr, end, &seconds.int64_val);
  6696. nanos.int32_val = jsondec_nanos(d, &ptr, end);
  6697. if (end - ptr != 1 || *ptr != 's') {
  6698. jsondec_err(d, "Malformed duration");
  6699. }
  6700. if (seconds.int64_val < -max || seconds.int64_val > max) {
  6701. jsondec_err(d, "Duration out of range");
  6702. }
  6703. if (seconds.int64_val < 0) {
  6704. nanos.int32_val = - nanos.int32_val;
  6705. }
  6706. upb_msg_set(msg, upb_msgdef_itof(m, 1), seconds, d->arena);
  6707. upb_msg_set(msg, upb_msgdef_itof(m, 2), nanos, d->arena);
  6708. }
  6709. static void jsondec_listvalue(jsondec *d, upb_msg *msg, const upb_msgdef *m) {
  6710. const upb_fielddef *values_f = upb_msgdef_itof(m, 1);
  6711. const upb_msgdef *value_m = upb_fielddef_msgsubdef(values_f);
  6712. upb_array *values = upb_msg_mutable(msg, values_f, d->arena).array;
  6713. jsondec_arrstart(d);
  6714. while (jsondec_arrnext(d)) {
  6715. upb_msg *value_msg = upb_msg_new(value_m, d->arena);
  6716. upb_msgval value;
  6717. value.msg_val = value_msg;
  6718. upb_array_append(values, value, d->arena);
  6719. jsondec_wellknownvalue(d, value_msg, value_m);
  6720. }
  6721. jsondec_arrend(d);
  6722. }
  6723. static void jsondec_struct(jsondec *d, upb_msg *msg, const upb_msgdef *m) {
  6724. const upb_fielddef *fields_f = upb_msgdef_itof(m, 1);
  6725. const upb_msgdef *entry_m = upb_fielddef_msgsubdef(fields_f);
  6726. const upb_fielddef *value_f = upb_msgdef_itof(entry_m, 2);
  6727. const upb_msgdef *value_m = upb_fielddef_msgsubdef(value_f);
  6728. upb_map *fields = upb_msg_mutable(msg, fields_f, d->arena).map;
  6729. jsondec_objstart(d);
  6730. while (jsondec_objnext(d)) {
  6731. upb_msgval key, value;
  6732. upb_msg *value_msg = upb_msg_new(value_m, d->arena);
  6733. key.str_val = jsondec_string(d);
  6734. value.msg_val = value_msg;
  6735. upb_map_set(fields, key, value, d->arena);
  6736. jsondec_entrysep(d);
  6737. jsondec_wellknownvalue(d, value_msg, value_m);
  6738. }
  6739. jsondec_objend(d);
  6740. }
  6741. static void jsondec_wellknownvalue(jsondec *d, upb_msg *msg,
  6742. const upb_msgdef *m) {
  6743. upb_msgval val;
  6744. const upb_fielddef *f;
  6745. upb_msg *submsg;
  6746. switch (jsondec_peek(d)) {
  6747. case JD_NUMBER:
  6748. /* double number_value = 2; */
  6749. f = upb_msgdef_itof(m, 2);
  6750. val.double_val = jsondec_number(d);
  6751. break;
  6752. case JD_STRING:
  6753. /* string string_value = 3; */
  6754. f = upb_msgdef_itof(m, 3);
  6755. val.str_val = jsondec_string(d);
  6756. break;
  6757. case JD_FALSE:
  6758. /* bool bool_value = 4; */
  6759. f = upb_msgdef_itof(m, 4);
  6760. val.bool_val = false;
  6761. jsondec_false(d);
  6762. break;
  6763. case JD_TRUE:
  6764. /* bool bool_value = 4; */
  6765. f = upb_msgdef_itof(m, 4);
  6766. val.bool_val = true;
  6767. jsondec_true(d);
  6768. break;
  6769. case JD_NULL:
  6770. /* NullValue null_value = 1; */
  6771. f = upb_msgdef_itof(m, 1);
  6772. val.int32_val = 0;
  6773. jsondec_null(d);
  6774. break;
  6775. /* Note: these cases return, because upb_msg_mutable() is enough. */
  6776. case JD_OBJECT:
  6777. /* Struct struct_value = 5; */
  6778. f = upb_msgdef_itof(m, 5);
  6779. submsg = upb_msg_mutable(msg, f, d->arena).msg;
  6780. jsondec_struct(d, submsg, upb_fielddef_msgsubdef(f));
  6781. return;
  6782. case JD_ARRAY:
  6783. /* ListValue list_value = 6; */
  6784. f = upb_msgdef_itof(m, 6);
  6785. submsg = upb_msg_mutable(msg, f, d->arena).msg;
  6786. jsondec_listvalue(d, submsg, upb_fielddef_msgsubdef(f));
  6787. return;
  6788. default:
  6789. UPB_UNREACHABLE();
  6790. }
  6791. upb_msg_set(msg, f, val, d->arena);
  6792. }
  6793. static upb_strview jsondec_mask(jsondec *d, const char *buf, const char *end) {
  6794. /* FieldMask fields grow due to inserted '_' characters, so we can't do the
  6795. * transform in place. */
  6796. const char *ptr = buf;
  6797. upb_strview ret;
  6798. char *out;
  6799. ret.size = end - ptr;
  6800. while (ptr < end) {
  6801. ret.size += (*ptr >= 'A' && *ptr <= 'Z');
  6802. ptr++;
  6803. }
  6804. out = upb_arena_malloc(d->arena, ret.size);
  6805. ptr = buf;
  6806. ret.data = out;
  6807. while (ptr < end) {
  6808. char ch = *ptr++;
  6809. if (ch >= 'A' && ch <= 'Z') {
  6810. *out++ = '_';
  6811. *out++ = ch + 32;
  6812. } else if (ch == '_') {
  6813. jsondec_err(d, "field mask may not contain '_'");
  6814. } else {
  6815. *out++ = ch;
  6816. }
  6817. }
  6818. return ret;
  6819. }
  6820. static void jsondec_fieldmask(jsondec *d, upb_msg *msg, const upb_msgdef *m) {
  6821. /* repeated string paths = 1; */
  6822. const upb_fielddef *paths_f = upb_msgdef_itof(m, 1);
  6823. upb_array *arr = upb_msg_mutable(msg, paths_f, d->arena).array;
  6824. upb_strview str = jsondec_string(d);
  6825. const char *ptr = str.data;
  6826. const char *end = ptr + str.size;
  6827. upb_msgval val;
  6828. while (ptr < end) {
  6829. const char *elem_end = memchr(ptr, ',', end - ptr);
  6830. if (elem_end) {
  6831. val.str_val = jsondec_mask(d, ptr, elem_end);
  6832. ptr = elem_end + 1;
  6833. } else {
  6834. val.str_val = jsondec_mask(d, ptr, end);
  6835. ptr = end;
  6836. }
  6837. upb_array_append(arr, val, d->arena);
  6838. }
  6839. }
  6840. static void jsondec_anyfield(jsondec *d, upb_msg *msg, const upb_msgdef *m) {
  6841. if (upb_msgdef_wellknowntype(m) == UPB_WELLKNOWN_UNSPECIFIED) {
  6842. /* For regular types: {"@type": "[user type]", "f1": <V1>, "f2": <V2>}
  6843. * where f1, f2, etc. are the normal fields of this type. */
  6844. jsondec_field(d, msg, m);
  6845. } else {
  6846. /* For well-known types: {"@type": "[well-known type]", "value": <X>}
  6847. * where <X> is whatever encoding the WKT normally uses. */
  6848. upb_strview str = jsondec_string(d);
  6849. jsondec_entrysep(d);
  6850. if (!jsondec_streql(str, "value")) {
  6851. jsondec_err(d, "Key for well-known type must be 'value'");
  6852. }
  6853. jsondec_wellknown(d, msg, m);
  6854. }
  6855. }
  6856. static const upb_msgdef *jsondec_typeurl(jsondec *d, upb_msg *msg,
  6857. const upb_msgdef *m) {
  6858. const upb_fielddef *type_url_f = upb_msgdef_itof(m, 1);
  6859. const upb_msgdef *type_m;
  6860. upb_strview type_url = jsondec_string(d);
  6861. const char *end = type_url.data + type_url.size;
  6862. const char *ptr = end;
  6863. upb_msgval val;
  6864. val.str_val = type_url;
  6865. upb_msg_set(msg, type_url_f, val, d->arena);
  6866. /* Find message name after the last '/' */
  6867. while (ptr > type_url.data && *--ptr != '/') {}
  6868. if (ptr == type_url.data || ptr == end) {
  6869. jsondec_err(d, "Type url must have at least one '/' and non-empty host");
  6870. }
  6871. ptr++;
  6872. type_m = upb_symtab_lookupmsg2(d->any_pool, ptr, end - ptr);
  6873. if (!type_m) {
  6874. jsondec_err(d, "Type was not found");
  6875. }
  6876. return type_m;
  6877. }
  6878. static void jsondec_any(jsondec *d, upb_msg *msg, const upb_msgdef *m) {
  6879. /* string type_url = 1;
  6880. * bytes value = 2; */
  6881. const upb_fielddef *value_f = upb_msgdef_itof(m, 2);
  6882. upb_msg *any_msg;
  6883. const upb_msgdef *any_m = NULL;
  6884. const char *pre_type_data = NULL;
  6885. const char *pre_type_end = NULL;
  6886. upb_msgval encoded;
  6887. jsondec_objstart(d);
  6888. /* Scan looking for "@type", which is not necessarily first. */
  6889. while (!any_m && jsondec_objnext(d)) {
  6890. const char *start = d->ptr;
  6891. upb_strview name = jsondec_string(d);
  6892. jsondec_entrysep(d);
  6893. if (jsondec_streql(name, "@type")) {
  6894. any_m = jsondec_typeurl(d, msg, m);
  6895. if (pre_type_data) {
  6896. pre_type_end = start;
  6897. while (*pre_type_end != ',') pre_type_end--;
  6898. }
  6899. } else {
  6900. if (!pre_type_data) pre_type_data = start;
  6901. jsondec_skipval(d);
  6902. }
  6903. }
  6904. if (!any_m) {
  6905. jsondec_err(d, "Any object didn't contain a '@type' field");
  6906. }
  6907. any_msg = upb_msg_new(any_m, d->arena);
  6908. if (pre_type_data) {
  6909. size_t len = pre_type_end - pre_type_data + 1;
  6910. char *tmp = upb_arena_malloc(d->arena, len);
  6911. const char *saved_ptr = d->ptr;
  6912. const char *saved_end = d->end;
  6913. memcpy(tmp, pre_type_data, len - 1);
  6914. tmp[len - 1] = '}';
  6915. d->ptr = tmp;
  6916. d->end = tmp + len;
  6917. d->is_first = true;
  6918. while (jsondec_objnext(d)) {
  6919. jsondec_anyfield(d, any_msg, any_m);
  6920. }
  6921. d->ptr = saved_ptr;
  6922. d->end = saved_end;
  6923. }
  6924. while (jsondec_objnext(d)) {
  6925. jsondec_anyfield(d, any_msg, any_m);
  6926. }
  6927. jsondec_objend(d);
  6928. encoded.str_val.data = upb_encode(any_msg, upb_msgdef_layout(any_m), d->arena,
  6929. &encoded.str_val.size);
  6930. upb_msg_set(msg, value_f, encoded, d->arena);
  6931. }
  6932. static void jsondec_wrapper(jsondec *d, upb_msg *msg, const upb_msgdef *m) {
  6933. const upb_fielddef *value_f = upb_msgdef_itof(m, 1);
  6934. upb_msgval val = jsondec_value(d, value_f);
  6935. upb_msg_set(msg, value_f, val, d->arena);
  6936. }
  6937. static void jsondec_wellknown(jsondec *d, upb_msg *msg, const upb_msgdef *m) {
  6938. switch (upb_msgdef_wellknowntype(m)) {
  6939. case UPB_WELLKNOWN_ANY:
  6940. jsondec_any(d, msg, m);
  6941. break;
  6942. case UPB_WELLKNOWN_FIELDMASK:
  6943. jsondec_fieldmask(d, msg, m);
  6944. break;
  6945. case UPB_WELLKNOWN_DURATION:
  6946. jsondec_duration(d, msg, m);
  6947. break;
  6948. case UPB_WELLKNOWN_TIMESTAMP:
  6949. jsondec_timestamp(d, msg, m);
  6950. break;
  6951. case UPB_WELLKNOWN_VALUE:
  6952. jsondec_wellknownvalue(d, msg, m);
  6953. break;
  6954. case UPB_WELLKNOWN_LISTVALUE:
  6955. jsondec_listvalue(d, msg, m);
  6956. break;
  6957. case UPB_WELLKNOWN_STRUCT:
  6958. jsondec_struct(d, msg, m);
  6959. break;
  6960. case UPB_WELLKNOWN_DOUBLEVALUE:
  6961. case UPB_WELLKNOWN_FLOATVALUE:
  6962. case UPB_WELLKNOWN_INT64VALUE:
  6963. case UPB_WELLKNOWN_UINT64VALUE:
  6964. case UPB_WELLKNOWN_INT32VALUE:
  6965. case UPB_WELLKNOWN_UINT32VALUE:
  6966. case UPB_WELLKNOWN_STRINGVALUE:
  6967. case UPB_WELLKNOWN_BYTESVALUE:
  6968. case UPB_WELLKNOWN_BOOLVALUE:
  6969. jsondec_wrapper(d, msg, m);
  6970. break;
  6971. default:
  6972. UPB_UNREACHABLE();
  6973. }
  6974. }
  6975. bool upb_json_decode(const char *buf, size_t size, upb_msg *msg,
  6976. const upb_msgdef *m, const upb_symtab *any_pool,
  6977. int options, upb_arena *arena, upb_status *status) {
  6978. jsondec d;
  6979. d.ptr = buf;
  6980. d.end = buf + size;
  6981. d.arena = arena;
  6982. d.any_pool = any_pool;
  6983. d.status = status;
  6984. d.options = options;
  6985. d.depth = 64;
  6986. d.line = 1;
  6987. d.line_begin = d.ptr;
  6988. d.debug_field = NULL;
  6989. d.is_first = false;
  6990. if (UPB_SETJMP(d.err)) return false;
  6991. jsondec_tomsg(&d, msg, m);
  6992. return true;
  6993. }
  6994. #include <ctype.h>
  6995. #include <float.h>
  6996. #include <inttypes.h>
  6997. #include <math.h>
  6998. #include <setjmp.h>
  6999. #include <stdarg.h>
  7000. #include <stdio.h>
  7001. #include <string.h>
  7002. /* Must be last. */
  7003. typedef struct {
  7004. char *buf, *ptr, *end;
  7005. size_t overflow;
  7006. int indent_depth;
  7007. int options;
  7008. const upb_symtab *ext_pool;
  7009. jmp_buf err;
  7010. upb_status *status;
  7011. upb_arena *arena;
  7012. } jsonenc;
  7013. static void jsonenc_msg(jsonenc *e, const upb_msg *msg, const upb_msgdef *m);
  7014. static void jsonenc_scalar(jsonenc *e, upb_msgval val, const upb_fielddef *f);
  7015. static void jsonenc_msgfield(jsonenc *e, const upb_msg *msg,
  7016. const upb_msgdef *m);
  7017. static void jsonenc_msgfields(jsonenc *e, const upb_msg *msg,
  7018. const upb_msgdef *m);
  7019. static void jsonenc_value(jsonenc *e, const upb_msg *msg, const upb_msgdef *m);
  7020. UPB_NORETURN static void jsonenc_err(jsonenc *e, const char *msg) {
  7021. upb_status_seterrmsg(e->status, msg);
  7022. longjmp(e->err, 1);
  7023. }
  7024. UPB_PRINTF(2, 3)
  7025. UPB_NORETURN static void jsonenc_errf(jsonenc *e, const char *fmt, ...) {
  7026. va_list argp;
  7027. va_start(argp, fmt);
  7028. upb_status_vseterrf(e->status, fmt, argp);
  7029. va_end(argp);
  7030. longjmp(e->err, 1);
  7031. }
  7032. static upb_arena *jsonenc_arena(jsonenc *e) {
  7033. /* Create lazily, since it's only needed for Any */
  7034. if (!e->arena) {
  7035. e->arena = upb_arena_new();
  7036. }
  7037. return e->arena;
  7038. }
  7039. static void jsonenc_putbytes(jsonenc *e, const void *data, size_t len) {
  7040. size_t have = e->end - e->ptr;
  7041. if (UPB_LIKELY(have >= len)) {
  7042. memcpy(e->ptr, data, len);
  7043. e->ptr += len;
  7044. } else {
  7045. if (have) memcpy(e->ptr, data, have);
  7046. e->ptr += have;
  7047. e->overflow += (len - have);
  7048. }
  7049. }
  7050. static void jsonenc_putstr(jsonenc *e, const char *str) {
  7051. jsonenc_putbytes(e, str, strlen(str));
  7052. }
  7053. UPB_PRINTF(2, 3)
  7054. static void jsonenc_printf(jsonenc *e, const char *fmt, ...) {
  7055. size_t n;
  7056. size_t have = e->end - e->ptr;
  7057. va_list args;
  7058. va_start(args, fmt);
  7059. n = vsnprintf(e->ptr, have, fmt, args);
  7060. va_end(args);
  7061. if (UPB_LIKELY(have > n)) {
  7062. e->ptr += n;
  7063. } else {
  7064. e->ptr += have;
  7065. e->overflow += (n - have);
  7066. }
  7067. }
  7068. static void jsonenc_nanos(jsonenc *e, int32_t nanos) {
  7069. int digits = 9;
  7070. if (nanos == 0) return;
  7071. if (nanos < 0 || nanos >= 1000000000) {
  7072. jsonenc_err(e, "error formatting timestamp as JSON: invalid nanos");
  7073. }
  7074. while (nanos % 1000 == 0) {
  7075. nanos /= 1000;
  7076. digits -= 3;
  7077. }
  7078. jsonenc_printf(e, ".%.*" PRId32, digits, nanos);
  7079. }
  7080. static void jsonenc_timestamp(jsonenc *e, const upb_msg *msg,
  7081. const upb_msgdef *m) {
  7082. const upb_fielddef *seconds_f = upb_msgdef_itof(m, 1);
  7083. const upb_fielddef *nanos_f = upb_msgdef_itof(m, 2);
  7084. int64_t seconds = upb_msg_get(msg, seconds_f).int64_val;
  7085. int32_t nanos = upb_msg_get(msg, nanos_f).int32_val;
  7086. int L, N, I, J, K, hour, min, sec;
  7087. if (seconds < -62135596800) {
  7088. jsonenc_err(e,
  7089. "error formatting timestamp as JSON: minimum acceptable value "
  7090. "is 0001-01-01T00:00:00Z");
  7091. } else if (seconds > 253402300799) {
  7092. jsonenc_err(e,
  7093. "error formatting timestamp as JSON: maximum acceptable value "
  7094. "is 9999-12-31T23:59:59Z");
  7095. }
  7096. /* Julian Day -> Y/M/D, Algorithm from:
  7097. * Fliegel, H. F., and Van Flandern, T. C., "A Machine Algorithm for
  7098. * Processing Calendar Dates," Communications of the Association of
  7099. * Computing Machines, vol. 11 (1968), p. 657. */
  7100. L = (int)(seconds / 86400) + 68569 + 2440588;
  7101. N = 4 * L / 146097;
  7102. L = L - (146097 * N + 3) / 4;
  7103. I = 4000 * (L + 1) / 1461001;
  7104. L = L - 1461 * I / 4 + 31;
  7105. J = 80 * L / 2447;
  7106. K = L - 2447 * J / 80;
  7107. L = J / 11;
  7108. J = J + 2 - 12 * L;
  7109. I = 100 * (N - 49) + I + L;
  7110. sec = seconds % 60;
  7111. min = (seconds / 60) % 60;
  7112. hour = (seconds / 3600) % 24;
  7113. jsonenc_printf(e, "\"%04d-%02d-%02dT%02d:%02d:%02d", I, J, K, hour, min, sec);
  7114. jsonenc_nanos(e, nanos);
  7115. jsonenc_putstr(e, "Z\"");
  7116. }
  7117. static void jsonenc_duration(jsonenc *e, const upb_msg *msg, const upb_msgdef *m) {
  7118. const upb_fielddef *seconds_f = upb_msgdef_itof(m, 1);
  7119. const upb_fielddef *nanos_f = upb_msgdef_itof(m, 2);
  7120. int64_t seconds = upb_msg_get(msg, seconds_f).int64_val;
  7121. int32_t nanos = upb_msg_get(msg, nanos_f).int32_val;
  7122. if (seconds > 315576000000 || seconds < -315576000000 ||
  7123. (seconds < 0) != (nanos < 0)) {
  7124. jsonenc_err(e, "bad duration");
  7125. }
  7126. if (nanos < 0) {
  7127. nanos = -nanos;
  7128. }
  7129. jsonenc_printf(e, "\"%" PRId64, seconds);
  7130. jsonenc_nanos(e, nanos);
  7131. jsonenc_putstr(e, "s\"");
  7132. }
  7133. static void jsonenc_enum(int32_t val, const upb_fielddef *f, jsonenc *e) {
  7134. const upb_enumdef *e_def = upb_fielddef_enumsubdef(f);
  7135. if (strcmp(upb_enumdef_fullname(e_def), "google.protobuf.NullValue") == 0) {
  7136. jsonenc_putstr(e, "null");
  7137. } else {
  7138. const char *name = upb_enumdef_iton(e_def, val);
  7139. if (name) {
  7140. jsonenc_printf(e, "\"%s\"", name);
  7141. } else {
  7142. jsonenc_printf(e, "%" PRId32, val);
  7143. }
  7144. }
  7145. }
  7146. static void jsonenc_bytes(jsonenc *e, upb_strview str) {
  7147. /* This is the regular base64, not the "web-safe" version. */
  7148. static const char base64[] =
  7149. "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
  7150. const unsigned char *ptr = (unsigned char*)str.data;
  7151. const unsigned char *end = ptr + str.size;
  7152. char buf[4];
  7153. jsonenc_putstr(e, "\"");
  7154. while (end - ptr >= 3) {
  7155. buf[0] = base64[ptr[0] >> 2];
  7156. buf[1] = base64[((ptr[0] & 0x3) << 4) | (ptr[1] >> 4)];
  7157. buf[2] = base64[((ptr[1] & 0xf) << 2) | (ptr[2] >> 6)];
  7158. buf[3] = base64[ptr[2] & 0x3f];
  7159. jsonenc_putbytes(e, buf, 4);
  7160. ptr += 3;
  7161. }
  7162. switch (end - ptr) {
  7163. case 2:
  7164. buf[0] = base64[ptr[0] >> 2];
  7165. buf[1] = base64[((ptr[0] & 0x3) << 4) | (ptr[1] >> 4)];
  7166. buf[2] = base64[(ptr[1] & 0xf) << 2];
  7167. buf[3] = '=';
  7168. jsonenc_putbytes(e, buf, 4);
  7169. break;
  7170. case 1:
  7171. buf[0] = base64[ptr[0] >> 2];
  7172. buf[1] = base64[((ptr[0] & 0x3) << 4)];
  7173. buf[2] = '=';
  7174. buf[3] = '=';
  7175. jsonenc_putbytes(e, buf, 4);
  7176. break;
  7177. }
  7178. jsonenc_putstr(e, "\"");
  7179. }
  7180. static void jsonenc_stringbody(jsonenc *e, upb_strview str) {
  7181. const char *ptr = str.data;
  7182. const char *end = ptr + str.size;
  7183. while (ptr < end) {
  7184. switch (*ptr) {
  7185. case '\n':
  7186. jsonenc_putstr(e, "\\n");
  7187. break;
  7188. case '\r':
  7189. jsonenc_putstr(e, "\\r");
  7190. break;
  7191. case '\t':
  7192. jsonenc_putstr(e, "\\t");
  7193. break;
  7194. case '\"':
  7195. jsonenc_putstr(e, "\\\"");
  7196. break;
  7197. case '\f':
  7198. jsonenc_putstr(e, "\\f");
  7199. break;
  7200. case '\b':
  7201. jsonenc_putstr(e, "\\b");
  7202. break;
  7203. case '\\':
  7204. jsonenc_putstr(e, "\\\\");
  7205. break;
  7206. default:
  7207. if ((uint8_t)*ptr < 0x20) {
  7208. jsonenc_printf(e, "\\u%04x", (int)(uint8_t)*ptr);
  7209. } else {
  7210. /* This could be a non-ASCII byte. We rely on the string being valid
  7211. * UTF-8. */
  7212. jsonenc_putbytes(e, ptr, 1);
  7213. }
  7214. break;
  7215. }
  7216. ptr++;
  7217. }
  7218. }
  7219. static void jsonenc_string(jsonenc *e, upb_strview str) {
  7220. jsonenc_putstr(e, "\"");
  7221. jsonenc_stringbody(e, str);
  7222. jsonenc_putstr(e, "\"");
  7223. }
  7224. static void jsonenc_double(jsonenc *e, const char *fmt, double val) {
  7225. if (val == INFINITY) {
  7226. jsonenc_putstr(e, "\"Infinity\"");
  7227. } else if (val == -INFINITY) {
  7228. jsonenc_putstr(e, "\"-Infinity\"");
  7229. } else if (val != val) {
  7230. jsonenc_putstr(e, "\"NaN\"");
  7231. } else {
  7232. jsonenc_printf(e, fmt, val);
  7233. }
  7234. }
  7235. static void jsonenc_wrapper(jsonenc *e, const upb_msg *msg,
  7236. const upb_msgdef *m) {
  7237. const upb_fielddef *val_f = upb_msgdef_itof(m, 1);
  7238. upb_msgval val = upb_msg_get(msg, val_f);
  7239. jsonenc_scalar(e, val, val_f);
  7240. }
  7241. static const upb_msgdef *jsonenc_getanymsg(jsonenc *e, upb_strview type_url) {
  7242. /* Find last '/', if any. */
  7243. const char *end = type_url.data + type_url.size;
  7244. const char *ptr = end;
  7245. const upb_msgdef *ret;
  7246. if (!e->ext_pool) {
  7247. jsonenc_err(e, "Tried to encode Any, but no symtab was provided");
  7248. }
  7249. if (type_url.size == 0) goto badurl;
  7250. while (true) {
  7251. if (--ptr == type_url.data) {
  7252. /* Type URL must contain at least one '/', with host before. */
  7253. goto badurl;
  7254. }
  7255. if (*ptr == '/') {
  7256. ptr++;
  7257. break;
  7258. }
  7259. }
  7260. ret = upb_symtab_lookupmsg2(e->ext_pool, ptr, end - ptr);
  7261. if (!ret) {
  7262. jsonenc_errf(e, "Couldn't find Any type: %.*s", (int)(end - ptr), ptr);
  7263. }
  7264. return ret;
  7265. badurl:
  7266. jsonenc_errf(
  7267. e, "Bad type URL: " UPB_STRVIEW_FORMAT, UPB_STRVIEW_ARGS(type_url));
  7268. }
  7269. static void jsonenc_any(jsonenc *e, const upb_msg *msg, const upb_msgdef *m) {
  7270. const upb_fielddef *type_url_f = upb_msgdef_itof(m, 1);
  7271. const upb_fielddef *value_f = upb_msgdef_itof(m, 2);
  7272. upb_strview type_url = upb_msg_get(msg, type_url_f).str_val;
  7273. upb_strview value = upb_msg_get(msg, value_f).str_val;
  7274. const upb_msgdef *any_m = jsonenc_getanymsg(e, type_url);
  7275. const upb_msglayout *any_layout = upb_msgdef_layout(any_m);
  7276. upb_arena *arena = jsonenc_arena(e);
  7277. upb_msg *any = upb_msg_new(any_m, arena);
  7278. if (!upb_decode(value.data, value.size, any, any_layout, arena)) {
  7279. jsonenc_err(e, "Error decoding message in Any");
  7280. }
  7281. jsonenc_putstr(e, "{\"@type\":");
  7282. jsonenc_string(e, type_url);
  7283. jsonenc_putstr(e, ",");
  7284. if (upb_msgdef_wellknowntype(any_m) == UPB_WELLKNOWN_UNSPECIFIED) {
  7285. /* Regular messages: {"@type": "...","foo": 1, "bar": 2} */
  7286. jsonenc_msgfields(e, any, any_m);
  7287. } else {
  7288. /* Well-known type: {"@type": "...","value": <well-known encoding>} */
  7289. jsonenc_putstr(e, "\"value\":");
  7290. jsonenc_msgfield(e, any, any_m);
  7291. }
  7292. jsonenc_putstr(e, "}");
  7293. }
  7294. static void jsonenc_putsep(jsonenc *e, const char *str, bool *first) {
  7295. if (*first) {
  7296. *first = false;
  7297. } else {
  7298. jsonenc_putstr(e, str);
  7299. }
  7300. }
  7301. static void jsonenc_fieldpath(jsonenc *e, upb_strview path) {
  7302. const char *ptr = path.data;
  7303. const char *end = ptr + path.size;
  7304. while (ptr < end) {
  7305. char ch = *ptr;
  7306. if (ch >= 'A' && ch <= 'Z') {
  7307. jsonenc_err(e, "Field mask element may not have upper-case letter.");
  7308. } else if (ch == '_') {
  7309. if (ptr == end - 1 || *(ptr + 1) < 'a' || *(ptr + 1) > 'z') {
  7310. jsonenc_err(e, "Underscore must be followed by a lowercase letter.");
  7311. }
  7312. ch = *++ptr - 32;
  7313. }
  7314. jsonenc_putbytes(e, &ch, 1);
  7315. ptr++;
  7316. }
  7317. }
  7318. static void jsonenc_fieldmask(jsonenc *e, const upb_msg *msg,
  7319. const upb_msgdef *m) {
  7320. const upb_fielddef *paths_f = upb_msgdef_itof(m, 1);
  7321. const upb_array *paths = upb_msg_get(msg, paths_f).array_val;
  7322. bool first = true;
  7323. size_t i, n = 0;
  7324. if (paths) n = upb_array_size(paths);
  7325. jsonenc_putstr(e, "\"");
  7326. for (i = 0; i < n; i++) {
  7327. jsonenc_putsep(e, ",", &first);
  7328. jsonenc_fieldpath(e, upb_array_get(paths, i).str_val);
  7329. }
  7330. jsonenc_putstr(e, "\"");
  7331. }
  7332. static void jsonenc_struct(jsonenc *e, const upb_msg *msg,
  7333. const upb_msgdef *m) {
  7334. const upb_fielddef *fields_f = upb_msgdef_itof(m, 1);
  7335. const upb_map *fields = upb_msg_get(msg, fields_f).map_val;
  7336. const upb_msgdef *entry_m = upb_fielddef_msgsubdef(fields_f);
  7337. const upb_fielddef *value_f = upb_msgdef_itof(entry_m, 2);
  7338. size_t iter = UPB_MAP_BEGIN;
  7339. bool first = true;
  7340. jsonenc_putstr(e, "{");
  7341. if (fields) {
  7342. while (upb_mapiter_next(fields, &iter)) {
  7343. upb_msgval key = upb_mapiter_key(fields, iter);
  7344. upb_msgval val = upb_mapiter_value(fields, iter);
  7345. jsonenc_putsep(e, ",", &first);
  7346. jsonenc_string(e, key.str_val);
  7347. jsonenc_putstr(e, ":");
  7348. jsonenc_value(e, val.msg_val, upb_fielddef_msgsubdef(value_f));
  7349. }
  7350. }
  7351. jsonenc_putstr(e, "}");
  7352. }
  7353. static void jsonenc_listvalue(jsonenc *e, const upb_msg *msg,
  7354. const upb_msgdef *m) {
  7355. const upb_fielddef *values_f = upb_msgdef_itof(m, 1);
  7356. const upb_msgdef *values_m = upb_fielddef_msgsubdef(values_f);
  7357. const upb_array *values = upb_msg_get(msg, values_f).array_val;
  7358. size_t i;
  7359. bool first = true;
  7360. jsonenc_putstr(e, "[");
  7361. if (values) {
  7362. const size_t size = upb_array_size(values);
  7363. for (i = 0; i < size; i++) {
  7364. upb_msgval elem = upb_array_get(values, i);
  7365. jsonenc_putsep(e, ",", &first);
  7366. jsonenc_value(e, elem.msg_val, values_m);
  7367. }
  7368. }
  7369. jsonenc_putstr(e, "]");
  7370. }
  7371. static void jsonenc_value(jsonenc *e, const upb_msg *msg, const upb_msgdef *m) {
  7372. /* TODO(haberman): do we want a reflection method to get oneof case? */
  7373. size_t iter = UPB_MSG_BEGIN;
  7374. const upb_fielddef *f;
  7375. upb_msgval val;
  7376. if (!upb_msg_next(msg, m, NULL, &f, &val, &iter)) {
  7377. jsonenc_err(e, "No value set in Value proto");
  7378. }
  7379. switch (upb_fielddef_number(f)) {
  7380. case 1:
  7381. jsonenc_putstr(e, "null");
  7382. break;
  7383. case 2:
  7384. jsonenc_double(e, "%.17g", val.double_val);
  7385. break;
  7386. case 3:
  7387. jsonenc_string(e, val.str_val);
  7388. break;
  7389. case 4:
  7390. jsonenc_putstr(e, val.bool_val ? "true" : "false");
  7391. break;
  7392. case 5:
  7393. jsonenc_struct(e, val.msg_val, upb_fielddef_msgsubdef(f));
  7394. break;
  7395. case 6:
  7396. jsonenc_listvalue(e, val.msg_val, upb_fielddef_msgsubdef(f));
  7397. break;
  7398. }
  7399. }
  7400. static void jsonenc_msgfield(jsonenc *e, const upb_msg *msg,
  7401. const upb_msgdef *m) {
  7402. switch (upb_msgdef_wellknowntype(m)) {
  7403. case UPB_WELLKNOWN_UNSPECIFIED:
  7404. jsonenc_msg(e, msg, m);
  7405. break;
  7406. case UPB_WELLKNOWN_ANY:
  7407. jsonenc_any(e, msg, m);
  7408. break;
  7409. case UPB_WELLKNOWN_FIELDMASK:
  7410. jsonenc_fieldmask(e, msg, m);
  7411. break;
  7412. case UPB_WELLKNOWN_DURATION:
  7413. jsonenc_duration(e, msg, m);
  7414. break;
  7415. case UPB_WELLKNOWN_TIMESTAMP:
  7416. jsonenc_timestamp(e, msg, m);
  7417. break;
  7418. case UPB_WELLKNOWN_DOUBLEVALUE:
  7419. case UPB_WELLKNOWN_FLOATVALUE:
  7420. case UPB_WELLKNOWN_INT64VALUE:
  7421. case UPB_WELLKNOWN_UINT64VALUE:
  7422. case UPB_WELLKNOWN_INT32VALUE:
  7423. case UPB_WELLKNOWN_UINT32VALUE:
  7424. case UPB_WELLKNOWN_STRINGVALUE:
  7425. case UPB_WELLKNOWN_BYTESVALUE:
  7426. case UPB_WELLKNOWN_BOOLVALUE:
  7427. jsonenc_wrapper(e, msg, m);
  7428. break;
  7429. case UPB_WELLKNOWN_VALUE:
  7430. jsonenc_value(e, msg, m);
  7431. break;
  7432. case UPB_WELLKNOWN_LISTVALUE:
  7433. jsonenc_listvalue(e, msg, m);
  7434. break;
  7435. case UPB_WELLKNOWN_STRUCT:
  7436. jsonenc_struct(e, msg, m);
  7437. break;
  7438. }
  7439. }
  7440. static void jsonenc_scalar(jsonenc *e, upb_msgval val, const upb_fielddef *f) {
  7441. switch (upb_fielddef_type(f)) {
  7442. case UPB_TYPE_BOOL:
  7443. jsonenc_putstr(e, val.bool_val ? "true" : "false");
  7444. break;
  7445. case UPB_TYPE_FLOAT:
  7446. jsonenc_double(e, "%.9g", val.float_val);
  7447. break;
  7448. case UPB_TYPE_DOUBLE:
  7449. jsonenc_double(e, "%.17g", val.double_val);
  7450. break;
  7451. case UPB_TYPE_INT32:
  7452. jsonenc_printf(e, "%" PRId32, val.int32_val);
  7453. break;
  7454. case UPB_TYPE_UINT32:
  7455. jsonenc_printf(e, "%" PRIu32, val.uint32_val);
  7456. break;
  7457. case UPB_TYPE_INT64:
  7458. jsonenc_printf(e, "\"%" PRId64 "\"", val.int64_val);
  7459. break;
  7460. case UPB_TYPE_UINT64:
  7461. jsonenc_printf(e, "\"%" PRIu64 "\"", val.uint64_val);
  7462. break;
  7463. case UPB_TYPE_STRING:
  7464. jsonenc_string(e, val.str_val);
  7465. break;
  7466. case UPB_TYPE_BYTES:
  7467. jsonenc_bytes(e, val.str_val);
  7468. break;
  7469. case UPB_TYPE_ENUM:
  7470. jsonenc_enum(val.int32_val, f, e);
  7471. break;
  7472. case UPB_TYPE_MESSAGE:
  7473. jsonenc_msgfield(e, val.msg_val, upb_fielddef_msgsubdef(f));
  7474. break;
  7475. }
  7476. }
  7477. static void jsonenc_mapkey(jsonenc *e, upb_msgval val, const upb_fielddef *f) {
  7478. jsonenc_putstr(e, "\"");
  7479. switch (upb_fielddef_type(f)) {
  7480. case UPB_TYPE_BOOL:
  7481. jsonenc_putstr(e, val.bool_val ? "true" : "false");
  7482. break;
  7483. case UPB_TYPE_INT32:
  7484. jsonenc_printf(e, "%" PRId32, val.int32_val);
  7485. break;
  7486. case UPB_TYPE_UINT32:
  7487. jsonenc_printf(e, "%" PRIu32, val.uint32_val);
  7488. break;
  7489. case UPB_TYPE_INT64:
  7490. jsonenc_printf(e, "%" PRId64, val.int64_val);
  7491. break;
  7492. case UPB_TYPE_UINT64:
  7493. jsonenc_printf(e, "%" PRIu64, val.uint64_val);
  7494. break;
  7495. case UPB_TYPE_STRING:
  7496. jsonenc_stringbody(e, val.str_val);
  7497. break;
  7498. default:
  7499. UPB_UNREACHABLE();
  7500. }
  7501. jsonenc_putstr(e, "\":");
  7502. }
  7503. static void jsonenc_array(jsonenc *e, const upb_array *arr,
  7504. const upb_fielddef *f) {
  7505. size_t i;
  7506. size_t size = arr ? upb_array_size(arr) : 0;
  7507. bool first = true;
  7508. jsonenc_putstr(e, "[");
  7509. for (i = 0; i < size; i++) {
  7510. jsonenc_putsep(e, ",", &first);
  7511. jsonenc_scalar(e, upb_array_get(arr, i), f);
  7512. }
  7513. jsonenc_putstr(e, "]");
  7514. }
  7515. static void jsonenc_map(jsonenc *e, const upb_map *map, const upb_fielddef *f) {
  7516. const upb_msgdef *entry = upb_fielddef_msgsubdef(f);
  7517. const upb_fielddef *key_f = upb_msgdef_itof(entry, 1);
  7518. const upb_fielddef *val_f = upb_msgdef_itof(entry, 2);
  7519. size_t iter = UPB_MAP_BEGIN;
  7520. bool first = true;
  7521. jsonenc_putstr(e, "{");
  7522. if (map) {
  7523. while (upb_mapiter_next(map, &iter)) {
  7524. jsonenc_putsep(e, ",", &first);
  7525. jsonenc_mapkey(e, upb_mapiter_key(map, iter), key_f);
  7526. jsonenc_scalar(e, upb_mapiter_value(map, iter), val_f);
  7527. }
  7528. }
  7529. jsonenc_putstr(e, "}");
  7530. }
  7531. static void jsonenc_fieldval(jsonenc *e, const upb_fielddef *f,
  7532. upb_msgval val, bool *first) {
  7533. const char *name;
  7534. if (e->options & UPB_JSONENC_PROTONAMES) {
  7535. name = upb_fielddef_name(f);
  7536. } else {
  7537. name = upb_fielddef_jsonname(f);
  7538. }
  7539. jsonenc_putsep(e, ",", first);
  7540. jsonenc_printf(e, "\"%s\":", name);
  7541. if (upb_fielddef_ismap(f)) {
  7542. jsonenc_map(e, val.map_val, f);
  7543. } else if (upb_fielddef_isseq(f)) {
  7544. jsonenc_array(e, val.array_val, f);
  7545. } else {
  7546. jsonenc_scalar(e, val, f);
  7547. }
  7548. }
  7549. static void jsonenc_msgfields(jsonenc *e, const upb_msg *msg,
  7550. const upb_msgdef *m) {
  7551. upb_msgval val;
  7552. const upb_fielddef *f;
  7553. bool first = true;
  7554. if (e->options & UPB_JSONENC_EMITDEFAULTS) {
  7555. /* Iterate over all fields. */
  7556. int i = 0;
  7557. int n = upb_msgdef_fieldcount(m);
  7558. for (i = 0; i < n; i++) {
  7559. f = upb_msgdef_field(m, i);
  7560. if (!upb_fielddef_haspresence(f) || upb_msg_has(msg, f)) {
  7561. jsonenc_fieldval(e, f, upb_msg_get(msg, f), &first);
  7562. }
  7563. }
  7564. } else {
  7565. /* Iterate over non-empty fields. */
  7566. size_t iter = UPB_MSG_BEGIN;
  7567. while (upb_msg_next(msg, m, e->ext_pool, &f, &val, &iter)) {
  7568. jsonenc_fieldval(e, f, val, &first);
  7569. }
  7570. }
  7571. }
  7572. static void jsonenc_msg(jsonenc *e, const upb_msg *msg, const upb_msgdef *m) {
  7573. jsonenc_putstr(e, "{");
  7574. jsonenc_msgfields(e, msg, m);
  7575. jsonenc_putstr(e, "}");
  7576. }
  7577. static size_t jsonenc_nullz(jsonenc *e, size_t size) {
  7578. size_t ret = e->ptr - e->buf + e->overflow;
  7579. if (size > 0) {
  7580. if (e->ptr == e->end) e->ptr--;
  7581. *e->ptr = '\0';
  7582. }
  7583. return ret;
  7584. }
  7585. size_t upb_json_encode(const upb_msg *msg, const upb_msgdef *m,
  7586. const upb_symtab *ext_pool, int options, char *buf,
  7587. size_t size, upb_status *status) {
  7588. jsonenc e;
  7589. e.buf = buf;
  7590. e.ptr = buf;
  7591. e.end = buf + size;
  7592. e.overflow = 0;
  7593. e.options = options;
  7594. e.ext_pool = ext_pool;
  7595. e.status = status;
  7596. e.arena = NULL;
  7597. if (setjmp(e.err)) return -1;
  7598. jsonenc_msgfield(&e, msg, m);
  7599. if (e.arena) upb_arena_free(e.arena);
  7600. return jsonenc_nullz(&e, size);
  7601. }
  7602. /* See port_def.inc. This should #undef all macros #defined there. */
  7603. #undef UPB_MAPTYPE_STRING
  7604. #undef UPB_SIZE
  7605. #undef UPB_PTR_AT
  7606. #undef UPB_READ_ONEOF
  7607. #undef UPB_WRITE_ONEOF
  7608. #undef UPB_INLINE
  7609. #undef UPB_ALIGN_UP
  7610. #undef UPB_ALIGN_DOWN
  7611. #undef UPB_ALIGN_MALLOC
  7612. #undef UPB_ALIGN_OF
  7613. #undef UPB_FORCEINLINE
  7614. #undef UPB_NOINLINE
  7615. #undef UPB_NORETURN
  7616. #undef UPB_MAX
  7617. #undef UPB_MIN
  7618. #undef UPB_UNUSED
  7619. #undef UPB_ASSUME
  7620. #undef UPB_ASSERT
  7621. #undef UPB_UNREACHABLE
  7622. #undef UPB_POISON_MEMORY_REGION
  7623. #undef UPB_UNPOISON_MEMORY_REGION
  7624. #undef UPB_ASAN