From 090082cc5c809f90d713a73ec5a0b16fbe2388f6 Mon Sep 17 00:00:00 2001 From: Andrews Arokiam Date: Thu, 12 Oct 2023 18:33:15 +0530 Subject: [PATCH 1/7] Added example for torchserve grpc v1 and v2. Signed-off-by: Andrews Arokiam --- .gitignore | 1 + .../modelserving/v1beta1/torchserve/README.md | 237 ++++++ .../v1beta1/torchserve/inference_pb2.py | 37 + .../v1beta1/torchserve/inference_pb2_grpc.py | 136 +++ .../v1beta1/torchserve/management_pb2.py | 40 + .../v1beta1/torchserve/management_pb2_grpc.py | 237 ++++++ .../v1beta1/torchserve/mnist_grpc.yaml | 14 + .../v1beta1/torchserve/mnist_grpc_v2.yaml | 15 + .../torchserve/mnist_v2_grpc_tensor.json | 803 ++++++++++++++++++ .../torchserve/proto/v1/inference.proto | 39 + .../torchserve/proto/v1/management.proto | 119 +++ .../torchserve/proto/v2/grpc_predict_v2.proto | 362 ++++++++ .../torchserve/torchserve_grpc_client.py | 178 ++++ 13 files changed, 2218 insertions(+) create mode 100644 docs/modelserving/v1beta1/torchserve/inference_pb2.py create mode 100644 docs/modelserving/v1beta1/torchserve/inference_pb2_grpc.py create mode 100644 docs/modelserving/v1beta1/torchserve/management_pb2.py create mode 100644 docs/modelserving/v1beta1/torchserve/management_pb2_grpc.py create mode 100644 docs/modelserving/v1beta1/torchserve/mnist_grpc.yaml create mode 100644 docs/modelserving/v1beta1/torchserve/mnist_grpc_v2.yaml create mode 100644 docs/modelserving/v1beta1/torchserve/mnist_v2_grpc_tensor.json create mode 100644 docs/modelserving/v1beta1/torchserve/proto/v1/inference.proto create mode 100644 docs/modelserving/v1beta1/torchserve/proto/v1/management.proto create mode 100644 docs/modelserving/v1beta1/torchserve/proto/v2/grpc_predict_v2.proto create mode 100644 docs/modelserving/v1beta1/torchserve/torchserve_grpc_client.py diff --git a/.gitignore b/.gitignore index 2be1b2764..0a46d0d09 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ # python .venv +*.pyc # editor .vscode diff --git a/docs/modelserving/v1beta1/torchserve/README.md b/docs/modelserving/v1beta1/torchserve/README.md index 3217931c6..732e7966b 100644 --- a/docs/modelserving/v1beta1/torchserve/README.md +++ b/docs/modelserving/v1beta1/torchserve/README.md @@ -202,6 +202,114 @@ curl -v -H "Host: ${SERVICE_HOSTNAME}" http://${INGRESS_HOST}:${INGRESS_PORT}/v1 {"explanations": [[[[0.0005394675730469475, -0.0022280013123036043, -0.003416480100841055, -0.0051329881112415965, -0.009973864160829985, -0.004112560908882716, -0.009223458030656112, -0.0006676354577291628, -0.005249806664413386, -0.0009790519227372953, -0.0026914653993121195, -0.0069470097151383995, -0.00693530415962956, -0.005973878697847718, -0.00425042437288857, 0.0032867281838150977, -0.004297780258633562, -0.005643196661192014, -0.00653025019738562, -0.0047062916121001185, -0.0018656628277792628, -0.0016757477204072532, -0.0010410417081844845, -0.0019093520822156726, -0.004451403461006374, -0.0008552767257773671, -0.0027638888169885267, -0.0], [0.006971297052106784, 0.007316855222185687, 0.012144494329150574, 0.011477799383288441, 0.006846725347670252, 0.01149386176451476, 0.0045351987881190655, 0.007038361889638708, 0.0035855377023272157, 0.003031419502053957, -0.0008611575226775316, -0.0011085224745969223, -0.0050840743637658534, 0.009855491784340777, 0.007220680811043034, 0.011374285598070253, 0.007147725481709019, 0.0037114580912849457, 0.00030763245479291384, 0.0018305492665953394, 0.010106224395114147, 0.012932881164284687, 0.008862892007714321, 0.0070960526615982435, -0.0015931137903787505, 0.0036495747329455906, 0.0002593849391051298, -0.0], [0.006467265785857396, -0.00041793201228071674, 0.004900316089756856, 0.002308395474823997, 0.007859295399592283, 0.003916404948969494, 0.005630750246437249, 0.0043712538044184375, 0.006128530599133763, -0.009446321309831246, -0.014173645867037036, -0.0062988650915794565, -0.011473838941118539, -0.009049151947644047, -0.0007625645864610934, -0.013721416630061238, -0.0005580156670410108, 0.0033404383756480784, -0.006693278798487951, -0.003705084551144756, 0.005100375089529131, 5.5276874714401074e-05, 0.007221745280359063, -0.00573598303916232, -0.006836169033785967, 0.0025401608627538936, 9.303533912921196e-05, -0.0], [0.005914399808621816, 0.00452643561023696, 0.003968242261515448, 0.010422786058967673, 0.007728358107899074, 0.01147115923288383, 0.005683869479056691, 0.011150670502307374, 0.008742555292485278, 0.0032882897575743754, 0.014841138421861584, 0.011741228362482451, 0.0004296862879259221, -0.0035118140680654854, -0.006152254410078331, -0.004925121936901983, -2.3611205202801947e-06, 0.029347073037039074, 0.02901626308947743, 0.023379353021343398, 0.004027157620197582, -0.01677662249919171, -0.013497255736128979, 0.006957482854214602, 0.0018321766800746145, 0.008277034396684563, 0.002733405455464871, -0.0], [0.0049579739156640065, -0.002168016158233997, 0.0020644317321723642, 0.0020912464240293825, 0.004719691119907336, 0.007879231202446626, 0.010594445898145937, 0.006533067778982801, 0.002290214592708113, -0.0036651114968251986, 0.010753227423379443, 0.006402706020466243, -0.047075193909339695, -0.08108259303568185, -0.07646875196692542, -0.1681834845371156, -0.1610307396135756, -0.12010309927453829, -0.016148831320070896, -0.009541525999486027, 0.04575604594761406, 0.031470966329886635, 0.02452149438024385, 0.016594078577569567, 0.012213591301610382, -0.002230875840404426, 0.0036704051254298374, -0.0], [0.006410107592414739, 0.005578283890924384, 0.001977103461731095, 0.008935476507124939, 0.0011305055729953436, 0.0004946313900665659, -0.0040266029554395935, -0.004270765544167256, -0.010832150944943138, -0.01653511868336456, -0.011121302103373972, -0.42038514526905024, -0.22874576003118394, -0.16752936178907055, -0.17021699697722079, -0.09998584936787697, -0.09041117495322142, -0.10230248444795721, -0.15260897522094888, 0.07770835838531896, -0.0813761125123066, 0.027556910053932963, 0.036305965104261866, 0.03407793793894619, 0.01212761779302579, 0.006695133380685627, 0.005331392748588556, -0.0], [0.008342680065996267, -0.00029249776150416367, 0.002782130291086583, 0.0027793744856745373, 0.0020525102690845407, 0.003679269934110004, 0.009373846012918791, -0.0031751745946300403, -0.009042846256743316, 0.0074141593032070775, -0.02796812516561052, -0.593171583786029, -0.4830164472795136, -0.353860128479443, -0.256482708704862, 0.11515586314578445, 0.12700563162828346, 0.0022342450630152204, -0.24673707669992118, -0.012878340813781437, 0.16866821780196756, 0.009739033161051434, -0.000827843726513152, -0.0002137320694585577, -0.004179480126338929, 0.008454049232317358, -0.002767934266266998, -0.0], [0.007070382982749552, 0.005342127805750565, -0.000983984198542354, 0.007910101170274493, 0.001266267696096404, 0.0038575136843053844, 0.006941130321773131, -0.015195182020687892, -0.016954974010578504, -0.031186444096787943, -0.031754626467747966, 0.038918845112017694, 0.06248943950328597, 0.07703301092601872, 0.0438493628024275, -0.0482404449771698, -0.08718650815999045, -0.0014764704694506415, -0.07426336448916614, -0.10378029666564882, 0.008572087846793842, -0.00017173413848283343, 0.010058893270893113, 0.0028410498666004377, 0.002008290211806285, 0.011905375389931099, 0.006071375802943992, -0.0], [0.0076080165949142685, -0.0017127333725310495, 0.00153128150106188, 0.0033391793764531563, 0.005373442509691564, 0.007207746020295443, 0.007422946703693544, -0.00699779191449194, 0.002395328253696969, -0.011682618874195954, -0.012737004464649057, -0.05379966383523857, -0.07174960461749053, -0.03027341304050314, 0.0019411862216381327, -0.0205575129473766, -0.04617091711614171, -0.017655308106959804, -0.009297162816368814, -0.03358572117988279, -0.1626068444778013, -0.015874364762085157, -0.0013736074085577258, -0.014763439328689378, 0.00631805792697278, 0.0021769414283267273, 0.0023061635006792498, -0.0], [0.005569931813561535, 0.004363218328087518, 0.00025609463218383973, 0.009577483244680675, 0.007257755916229399, 0.00976284778532342, -0.006388840235419147, -0.009017880790555707, -0.015308709334434867, -0.016743935775597355, -0.04372596546189275, -0.03523469356755156, -0.017257810114846107, 0.011960489902313411, 0.01529079831828911, -0.020076559119468443, -0.042792547669901516, -0.0029492027218867116, -0.011109560582516062, -0.12985858077848939, -0.2262858575494602, -0.003391725540087574, -0.03063368684328981, -0.01353486587575121, 0.0011140822443932317, 0.006583451102528798, 0.005667533945285076, -0.0], [0.004056272267155598, -0.0006394041203204911, 0.004664893926197093, 0.010593032387298614, 0.014750931538689989, 0.015428721146282149, 0.012167820222401367, 0.017604752451202518, 0.01038886849969188, 0.020544326931163263, -0.0004206566917812794, -0.0037463581359232674, -0.0024656693040735075, 0.0026061897697624353, -0.05186055271869177, -0.09158655048397382, 0.022976389912563913, -0.19851635458461808, -0.11801281807622972, -0.29127727790584423, -0.017138655663803876, -0.04395515676468641, -0.019241432506341576, 0.0011342298743447392, 0.0030625771422964584, -0.0002867924892991192, -0.0017908808807543712, -0.0], [0.0030114260660488892, 0.0020246448273580006, -0.003293361220376816, 0.0036965043883218584, 0.00013185761728146236, -0.004355610866966878, -0.006432601921104354, -0.004148701459814858, 0.005974553907915845, -0.0001399233607281906, 0.010392944122965082, 0.015693249298693028, 0.0459528427528407, -0.013921539948093455, -0.06615556518538708, 0.02921438991320325, -0.16345220625101778, -0.002130491295590408, -0.11449749664916867, -0.030980255589300607, -0.04804122537359171, -0.05144994776295644, 0.005122827412776085, 0.006464862173908011, 0.008624278272940246, 0.0037316228508156427, 0.0036947794337026706, -0.0], [0.0038173843228389405, -0.0017091931226819494, -0.0030871869816778068, 0.002115642501535999, -0.006926441921580917, -0.003023077828426468, -0.014451359520861637, -0.0020793048380231397, -0.010948003939342523, -0.0014460716966395166, -0.01656990336897737, 0.003052317148320358, -0.0026729564809943513, -0.06360067057346147, 0.07780985635080599, -0.1436689936630281, -0.040817177623437874, -0.04373367754296477, -0.18337299150349698, 0.025295182977407064, -0.03874921104331938, -0.002353901742617205, 0.011772560401335033, 0.012480994515707569, 0.006498422579824301, 0.00632320984076023, 0.003407169765754805, -0.0], [0.00944355257990139, 0.009242583578688485, 0.005069860444386138, 0.012666191449103024, 0.00941789912565746, 0.004720427012836104, 0.007597687789204113, 0.008679266528089945, 0.00889322771021875, -0.0008577904940828809, 0.0022973860384607604, 0.025328230809207493, -0.09908781123080951, -0.07836626399832172, -0.1546141264726177, -0.2582207272050766, -0.2297524599578219, -0.29561835103416967, 0.12048787956671528, -0.06279365699861471, -0.03832012404275233, 0.022910264999199934, 0.005803508497672737, -0.003858461926053348, 0.0039451232171312765, 0.003858476747495933, 0.0013034515558609956, -0.0], [0.009725756015628606, -0.0004001101998876524, 0.006490722835571152, 0.00800808023631959, 0.0065880711806331265, -0.0010264326176194034, -0.0018914305972878344, -0.008822522194658438, -0.016650520788128117, -0.03254382594389507, -0.014795713101569494, -0.05826499837818885, -0.05165369567511702, -0.13384277337594377, -0.22572641373340493, -0.21584739544668635, -0.2366836351939208, 0.14937824076489659, -0.08127414932170171, -0.06720440139736879, -0.0038552732903526744, 0.0107597891707803, -5.67453590118174e-05, 0.0020161340511396244, -0.000783322694907436, -0.0006397207517995289, -0.005291639205010064, -0.0], [0.008627543242777584, 0.007700097300051849, 0.0020430960246806138, 0.012949015733198586, 0.008428709579953574, 0.001358177022953576, 0.00421863939925833, 0.002657580000868709, -0.007339431957237175, 0.02008439775442315, -0.0033717631758033114, -0.05176633249899187, -0.013790328758662772, -0.39102366157050594, -0.167341447585844, -0.04813367828213947, 0.1367781582239039, -0.04672809260566293, -0.03237784669978756, 0.03218068777925178, 0.02415063765016493, -0.017849899351200002, -0.002975675228088795, -0.004819438014786686, 0.005106898651831245, 0.0024278620704227456, 6.784303333368138e-05, -0.0], [0.009644258527009343, -0.001331907219439711, -0.0014639718434477777, 0.008481926798958248, 0.010278031715467508, 0.003625808326891529, -0.01121188617599796, -0.0010634587872994379, -0.0002603820881968461, -0.017985648016990465, -0.06446652745470374, 0.07726063173046191, -0.24739929795334742, -0.2701855018480216, -0.08888614776216278, 0.1373325760136816, -0.02316068912438066, -0.042164834956711514, 0.0009266091344106458, 0.03141872420427644, 0.011587728430225652, 0.0004755143243520787, 0.005860642609620605, 0.008979633931394438, 0.005061734169974005, 0.003932710387086098, 0.0015489986106803626, -0.0], [0.010998736164377534, 0.009378969800902604, 0.00030577045264713074, 0.0159329353530375, 0.014849508018911006, -0.0026513365659554225, 0.002923303082126996, 0.01917908707828847, -0.02338288107991566, -0.05706674679291175, 0.009526265752669624, -0.19945255386401284, -0.10725519695909647, -0.3222906835083537, -0.03857038318412844, -0.013279804965996065, -0.046626023244262085, -0.029299060237210447, -0.043269580558906555, -0.03768510002290657, -0.02255977771908117, -0.02632588166863199, -0.014417349488098566, -0.003077271951572957, -0.0004973277708010661, 0.0003475839139671271, -0.0014522783025903258, -0.0], [0.012215315671616316, -0.001693194176229889, 0.011365785434529038, 0.0036964574178487792, -0.010126738168635003, -0.025554378647710443, 0.006538003839811914, -0.03181759044467965, -0.016424751042854728, 0.06177539736110035, -0.43801735323216856, -0.29991040815937386, -0.2516019795363623, 0.037789523540809, -0.010948746374759491, -0.0633901687126727, -0.005976006160777705, 0.006035133605976937, -0.04961632526071937, -0.04142116972831476, -0.07558952727782252, -0.04165176179187153, -0.02021603856619006, -0.0027365663096057032, -0.011145473712733575, 0.0003566937349350848, -0.00546472985268321, -0.0], [0.008009386447317503, 0.006831207743885825, 0.0051306149795546365, 0.016239014770865052, 0.020925441734273218, 0.028344800173195076, -0.004805080609285047, -0.01880521614501033, -0.1272329010865855, -0.39835936819190537, -0.09113694760349819, -0.04061591094832608, -0.12677021961235907, 0.015567707226741051, -0.005615051546243333, -0.06454044862001587, 0.0195457674752272, -0.04219686517155871, -0.08060569979524296, 0.027234494361702787, -0.009152881336047056, -0.030865118003992217, -0.005770311060090559, 0.002905833371986098, 5.606663556872091e-05, 0.003209538083839772, -0.0018588810743365345, -0.0], [0.007587008852984699, -0.0021213639853557625, 0.0007709558092903736, 0.013883256128746423, 0.017328713012428214, 0.03645357525636198, -0.04043993335238427, 0.05730125171252314, -0.2563293727512057, -0.11438826083879326, 0.02662382809034687, 0.03525271352483709, 0.04745678120172762, 0.0336360484090392, -0.002916635707204059, -0.17950855098650784, -0.44161773297052964, -0.4512180227831197, -0.4940283106297913, -0.1970108671285798, 0.04344323143078066, -0.012005120444897523, 0.00987576109166055, -0.0018336757466252476, 0.0004913959502151706, -0.0005409724034216215, -0.005039223900868212, -0.0], [0.00637876531169957, 0.005189469227685454, 0.0007676355246000376, 0.018378100865097655, 0.015739815031394887, -0.035524983116512455, 0.03781006978038308, 0.28859052096740495, 0.0726464110153121, -0.026768468497420147, 0.06278766200288134, 0.17897045813699355, -0.13780371920803108, -0.14176458123649577, -0.1733103177731656, -0.3106508869296763, 0.04788355140275794, 0.04235327890285105, -0.031266625292514394, -0.016263819217960652, -0.031388328800811355, -0.01791363975905968, -0.012025067979443894, 0.008335083985905805, -0.0014386677797296231, 0.0055376544652972854, 0.002241522815466253, -0.0], [0.007455256326741617, -0.0009475207572210404, 0.0020288385162615286, 0.015399640135796092, 0.021133843188103074, -0.019846405097622234, -0.003162485751163173, -0.14199005055318842, -0.044200898667146035, -0.013395459413208084, 0.11019680479230103, -0.014057216041764874, -0.12553853334447865, -0.05992513534766256, 0.06467942189539834, 0.08866056095907732, -0.1451321508061849, -0.07382491447758655, -0.046961739981080476, 0.0008943713493160624, 0.03231044103656507, 0.00036034241706501196, -0.011387669277619417, -0.00014602449257226195, -0.0021863729003374116, 0.0018817840156005856, 0.0037909804578166286, -0.0], [0.006511855618626698, 0.006236866054439829, -0.001440571166157676, 0.012795776609942026, 0.011530545030403624, 0.03495489377257363, 0.04792403136095304, 0.049378583599065225, 0.03296101702085617, -0.0005351385876652296, 0.017744115897640366, 0.0011656622496764954, 0.0232845869823761, -0.0561191397060232, -0.02854070511118366, -0.028614174047247348, -0.007763531086362863, 0.01823079560098924, 0.021961392405283622, -0.009666681805706179, 0.009547046884328725, -0.008729943263791338, 0.006408909680578429, 0.009794327096359952, -0.0025825219195515304, 0.007063559189211571, 0.007867244119267047, -0.0], [0.007936663546039311, -0.00010710180170593153, 0.002716512705673228, 0.0038633557307721487, -0.0014877316616940372, -0.0004788143065635909, 0.012508842248031202, 0.0045381104608414645, -0.010650910516128294, -0.013785341529644855, -0.034287643221318206, -0.022152707546335495, -0.047056481347685974, -0.032166744564720455, -0.021551611335278546, -0.002174962503376043, 0.024344287130424306, 0.015579272560525105, 0.010958169741952194, -0.010607232913436921, -0.005548369726118836, -0.0014630046444242706, 0.013144180105016433, 0.0031349366359021916, 0.0010984887428255974, 0.005426941473328394, 0.006566511860044785, -0.0], [0.0005529184874606495, 0.00026139355020588705, -0.002887623443531047, 0.0013988462990850632, 0.00203365139495493, -0.007276926701775218, -0.004010419939595932, 0.017521952161185662, 0.0006996977433557911, 0.02083134683611201, 0.013690533534289498, -0.005466724359976675, -0.008857712321334327, 0.017408578822635818, 0.0076439343049154425, 0.0017861314923539985, 0.007465865707523924, 0.008034420825988495, 0.003976298558337994, 0.00411970637898539, -0.004572592545819698, 0.0029563907011979935, -0.0006382227820088148, 0.0015153753877889707, -0.0052626601797995595, 0.0025664706985019416, 0.005161751034260073, -0.0], [0.0009424280561998445, -0.0012942360298110595, 0.0011900868416523343, 0.000984424113178899, 0.0020988269382781564, -0.005870080062890889, -0.004950484744457169, 0.003117643454332697, -0.002509563565777083, 0.005831604884101081, 0.009531085216183116, 0.010030206821909806, 0.005858190171099734, 4.9344529936340524e-05, -0.004027895832421331, 0.0025436439920587606, 0.00531153867563076, 0.00495942692369508, 0.009215148318606382, 0.00010011928317543458, 0.0060051362999805355, -0.0008195376963202741, 0.0041728603512658224, -0.0017597169567888774, -0.0010577007775543158, 0.00046033327178068433, -0.0007674196306044449, -0.0], [-0.0, -0.0, 0.0013386963856532302, 0.00035183178922260837, 0.0030610334903526204, 8.951834979315781e-05, 0.0023676793550483524, -0.0002900551076915047, -0.00207019445286608, -7.61697478482574e-05, 0.0012150086715244216, 0.009831239281792168, 0.003479667642621962, 0.0070584324334114525, 0.004161851261339585, 0.0026146296354490665, -9.194746959222099e-05, 0.0013583866966571571, 0.0016821551239318913, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0]]]]} ``` +## Deploy PyTorch Model with V1 GRPC Protocol + +_**Note**_: Since kserve has no grpc client methods for v1, we are using torchserve's grpc v1 client +### Create the InferenceService + +For deploying the `InferenceService` with GRPC protocol you need to expose the GRPC port on InferenceService. Here **7070** is torchserve GRPC port. + +Apply the following [mnist_grpc.yaml](./mnist.yaml) to create the `InferenceService`. + +=== "kubectl" +```bash +kubectl apply -f mnist_grpc.yaml +``` + +Expected Output + +```bash +$inferenceservice.serving.kserve.io/torchserve-grpc created +``` + +=== "Old Schema" + + ```yaml + apiVersion: serving.kserve.io/v1beta1 + kind: InferenceService + metadata: + name: "torchserve-grpc" + spec: + predictor: + pytorch: + storageUri: gs://kfserving-examples/models/torchserve/image_classifier/v1 + ports: + - containerPort: 7070 + name: h2c + protocol: TCP + ``` + +=== "New Schema" + + ```yaml + apiVersion: "serving.kserve.io/v1beta1" + kind: "InferenceService" + metadata: + name: "torchserve-grpc" + spec: + predictor: + model: + modelFormat: + name: pytorch + storageUri: gs://kfserving-examples/models/torchserve/image_classifier/v1 + ports: + - containerPort: 7070 + name: h2c + protocol: TCP + ``` + +### Run Inference with GRPC protocol for torchserve grpc v1 + +#### Install GRPC python dependencies + +```bash +pip install -U grpcio protobuf grpcio-tools +``` + +#### Download torchserve's inference and management proto + + +```bash +mkdir -p proto/v1 + +INFERENCE_PROTO_FILE_PATH=https://raw.githubusercontent.com/pytorch/serve/master/frontend/server/src/main/resources/proto/inference.proto +MANAGEMENT_PROTO_FILE_PATH=https://raw.githubusercontent.com/pytorch/serve/master/frontend/server/src/main/resources/proto/management.proto + +curl -s -L ${INFERENCE_PROTO_FILE_PATH} > ./proto/v1/inference.proto +curl -s -L ${MANAGEMENT_PROTO_FILE_PATH} > ./proto/v1/management.proto +``` + +#### Generate python GRPC client stub using the proto files + +```bash +python -m grpc_tools.protoc --proto_path=proto/v1/ --python_out=. --grpc_python_out=. proto/v1/inference.proto proto/v1/management.proto +``` +#### Run GRPC Inference + +You can use [image converter](https://github.com/kserve/kserve/tree/master/docs/samples/v1beta1/torchserve/v1/imgconv) to convert the images to base64 byte array, for other models please refer to [input request](https://github.com/pytorch/serve/tree/master/kubernetes/kserve/kf_request_json). + +Use this [`mnist.json`](./mnist.json) for sample prediction input. + +Refer the following [`torchserve_grpc_client.py`](./torchserve_grpc_client.py) python script to make torchserve v1 GRPC call. + +```bash + +MODEL_NAME=mnist +INPUT_PATH=mnist.json +SERVICE_HOSTNAME=$(kubectl get inferenceservice torchserve-grpc -o jsonpath='{.status.url}' | cut -d "/" -f 3) + +python torchserve_grpc_client.py --api_name infer --model $MODEL_NAME --input_path $INPUT_PATH --hostname $SERVICE_HOSTNAME +``` + +!!! success "Expected Output" +```bash +{ + "predictions": [ + 2 + ] +} +``` + ## Deploy PyTorch model with V2 REST Protocol ### Create the InferenceService @@ -296,6 +404,135 @@ curl -v -H "Host: ${SERVICE_HOSTNAME}" http://${INGRESS_HOST}:${INGRESS_PORT}/v2 725422148614e-05, 0.0014516114512869852, 0.0002827701966546988, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0, 0.0, 0.0, 0.0, 0.0, -0.0, -0.0014401407633627265, 0.0023812497776698745, 0.002146825301700187, -0.0, -0.0, 0.0, -0.0, 0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, 0.0011500529125940918, 0.0002865015572973405, 0.0029798151042282686, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0017750295500283872, 0.0008339859126060243, -0.00377073933577687, -0.0, -0.0, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0, -0.0, 0.0, 0.0, -0.0006093176894575109, -0.00046905787892409935, 0.0034053218511795034, 0.0, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0, 0.0, -0.0, -0.0, -0.0007450011768391558, 0.001298767372877851, -0.008499247640112315, -6.145166131400234e-05, -0.0, -0.0, -0.0, 0.0, 0.0, -0.0, -0.0, -0.0, 0.0, -0.0, 0.0, 0.0011809726042792137, -0.001838476328106708, 0.00541110661116898, 0.0, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0, -0.0, 0.0, -0.002139234224224006, 0.0003259163407641124, -0.005276118873855287, -0.001950984007438105, -9.545670742026532e-07, 0.0, -0.0, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0, -0.0, 0.0, 0.0, 0.0007772404228681039, -0.0001517956264720738, 0.0064814848131711815, -0.0, 0.0, 0.0, -0.0, -0.0, -0.0, -0.0, -0.0, 8.098064985902114e-05, -0.00249042660692983, -0.0020718619200672302, -5.341117902942147e-05, -0.00045564724429915073, 0.0, -0.0, -0.0, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0, -0.0, 0.0, 0.0, 0.0, 0.0022750983476959733, 0.0017164060958460778, 0.0003221344707738082, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0015560282678744543, 9.107238495871273e-05, 0.0008772841497928399, 0.0006502978626355868, -0.004128780767525651, 0.0006030386900152659, 0.0, -0.0, 0.0, -0.0, -0.0, 0.0, 0.0, -0.0, -0.0, 0.0, -0.0, -0.0, 0.0, 0.0, 0.001395995791096219, 0.0026791526689584344, 0.0023995008266391488, -0.0004496096312746451, 0.003101832450753724, 0.007494536066960778, 0.0028641187148287965, -0.0030525907182629075, 0.003420222396518567, 0.0014924018363498125, -0.0009357388301326025, 0.0007856228933169799, -0.0018433973914981437, 1.6031856831240914e-05, 0.0, 0.0, -0.0, -0.0, 0.0, 0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, 0.0, -0.0006999018502034005, 0.004382250870697946, -0.0035419313267119365, -0.0028896748092595375, -0.00048734542493666705, -0.0060873452419295, 0.000388224990424471, 0.002533641537585585, -0.004352836563597573, -0.0006079418766875505, -0.0038101334053377753, -0.000828441340357984, 0.0, -0.0, 0.0, 0.0, -0.0, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0, 0.0, -0.0, -0.0, -0.0, -0.0, -0.0, 0.0010901530866342661, -0.013135008038845744, 0.0004734518707654666, 0.002050423283568135, -0.006609451922460863, 0.0023647861820124366, 0.0046789204256194, -0.0018122527412311837, 0.002137538353955849, 0.0, -0.0, -0.0, 0.0, 0.0, -0.0, -0.0, -0.0, 0.0, 0.0, 0.0, -0.0, -0.0, 0.0, -0.0, -0.0, -0.0, -0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0, 0.0, -0.0, -0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0, 0.0, 0.0, -0.0, -0.0, 0.0, -0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.0, -0.0, 0.0, -0.0, -0.0, -0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0, 0.0, 0.0, -0.0, -0.0, -0.0, 0.0, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, 0.0, 0.0, -0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.0, -0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.0, 0.0, -0.0, -0.0, -0.0, -0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}]} ``` +## Deploy PyTorch Model with V2 GRPC Protocol + + +### Create the InferenceService + +For deploying the `InferenceService` with GRPC protocol you need to expose the GRPC port on InferenceService. Here **8081** is kserve GRPC port. + +Apply the following [mnist_grpc_v2.yaml](./mnist_grpc_v2.yaml) to create the `InferenceService`. + +=== "kubectl" +```bash +kubectl apply -f mnist_grpc_v2.yaml +``` + +Expected Output + +```bash +$inferenceservice.serving.kserve.io/torchserve-grpc-v2 created +``` + +=== "Old Schema" + + ```yaml + apiVersion: serving.kserve.io/v1beta1 + kind: InferenceService + metadata: + name: "torchserve-grpc-v2" + spec: + predictor: + pytorch: + protocolVersion: v2 + storageUri: gs://kfserving-examples/models/torchserve/image_classifier/v2 + ports: + - containerPort: 8081 + name: h2c + protocol: TCP + ``` + +=== "New Schema" + + ```yaml + apiVersion: serving.kserve.io/v1beta1 + kind: InferenceService + metadata: + name: "torchserve-grpc-v2" + spec: + predictor: + model: + modelFormat: + name: pytorch + protocolVersion: v2 + storageUri: gs://kfserving-examples/models/torchserve/image_classifier/v2 + ports: + - containerPort: 8081 + name: h2c + protocol: TCP + ``` + + +### Download kserve's grpc inference proto + +```bash +mkdir -p proto/v2 + +PROTO_FILE_PATH=https://raw.githubusercontent.com/kserve/kserve/master/python/kserve/kserve/protocol/grpc/grpc_predict_v2.proto + +curl -s -L ${PROTO_FILE_PATH} > ./proto/v2/grpc_predict_v2.proto +``` + +### Run Model Inference + +The first step is to [determine the ingress IP and ports](../../../get_started/first_isvc.md#4-determine-the-ingress-ip-and-ports) and set `INGRESS_HOST` and `INGRESS_PORT`. + +```bash +INPUT_PATH=./mnist_v2_grpc_tensor.json +PROTO_FILE=proto/v2/grpc_predict_v2.proto +SERVICE_HOSTNAME=$(kubectl get inferenceservice torchserve-grpc-v2 -o jsonpath='{.status.url}' | cut -d "/" -f 3) +``` + +### Make grpc call + +```bash +grpcurl -v -plaintext -proto ${PROTO_FILE} -authority ${SERVICE_HOSTNAME} -d @ ${INGRESS_HOST}:${INGRESS_PORT} inference.GRPCInferenceService.ModelInfer <<< $(cat "$INPUT_PATH") +``` + + +!!! success "Expected Output" + +```bash +Resolved method descriptor: +// The ModelInfer API performs inference using the specified model. Errors are +// indicated by the google.rpc.Status returned for the request. The OK code +// indicates success and other codes indicate failure. +rpc ModelInfer ( .inference.ModelInferRequest ) returns ( .inference.ModelInferResponse ); + +Request metadata to send: +(empty) + +Response headers received: +content-type: application/grpc +date: Wed, 11 Oct 2023 13:36:30 GMT +grpc-accept-encoding: identity, deflate, gzip +server: istio-envoy +x-envoy-upstream-service-time: 581 + +Response contents: +{ + "modelName": "mnist", + "id": "d3b15cad-50a2-4eaf-80ce-8b0a428bd298", + "outputs": [ + { + "name": "input-0", + "datatype": "INT64", + "shape": [ + "1" + ], + "contents": { + "int64Contents": [ + "1" + ] + } + } + ] +} + +Response trailers received: +(empty) +Sent 1 request and received 1 response +``` ## Autoscaling One of the main serverless inference features is to automatically scale the replicas of an `InferenceService` matching the incoming workload. diff --git a/docs/modelserving/v1beta1/torchserve/inference_pb2.py b/docs/modelserving/v1beta1/torchserve/inference_pb2.py new file mode 100644 index 000000000..32068ed6f --- /dev/null +++ b/docs/modelserving/v1beta1/torchserve/inference_pb2.py @@ -0,0 +1,37 @@ +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: inference.proto +"""Generated protocol buffer code.""" +from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import symbol_database as _symbol_database +from google.protobuf.internal import builder as _builder +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + +from google.protobuf import empty_pb2 as google_dot_protobuf_dot_empty__pb2 + + +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0finference.proto\x12 org.pytorch.serve.grpc.inference\x1a\x1bgoogle/protobuf/empty.proto\"\xbd\x01\n\x12PredictionsRequest\x12\x12\n\nmodel_name\x18\x01 \x01(\t\x12\x15\n\rmodel_version\x18\x02 \x01(\t\x12N\n\x05input\x18\x03 \x03(\x0b\x32?.org.pytorch.serve.grpc.inference.PredictionsRequest.InputEntry\x1a,\n\nInputEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x0c:\x02\x38\x01\"(\n\x12PredictionResponse\x12\x12\n\nprediction\x18\x01 \x01(\x0c\"*\n\x18TorchServeHealthResponse\x12\x0e\n\x06health\x18\x01 \x01(\t2\xf7\x02\n\x14InferenceAPIsService\x12\\\n\x04Ping\x12\x16.google.protobuf.Empty\x1a:.org.pytorch.serve.grpc.inference.TorchServeHealthResponse\"\x00\x12{\n\x0bPredictions\x12\x34.org.pytorch.serve.grpc.inference.PredictionsRequest\x1a\x34.org.pytorch.serve.grpc.inference.PredictionResponse\"\x00\x12\x83\x01\n\x11StreamPredictions\x12\x34.org.pytorch.serve.grpc.inference.PredictionsRequest\x1a\x34.org.pytorch.serve.grpc.inference.PredictionResponse\"\x00\x30\x01\x42\x02P\x01\x62\x06proto3') + +_globals = globals() +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'inference_pb2', _globals) +if _descriptor._USE_C_DESCRIPTORS == False: + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'P\001' + _PREDICTIONSREQUEST_INPUTENTRY._options = None + _PREDICTIONSREQUEST_INPUTENTRY._serialized_options = b'8\001' + _globals['_PREDICTIONSREQUEST']._serialized_start=83 + _globals['_PREDICTIONSREQUEST']._serialized_end=272 + _globals['_PREDICTIONSREQUEST_INPUTENTRY']._serialized_start=228 + _globals['_PREDICTIONSREQUEST_INPUTENTRY']._serialized_end=272 + _globals['_PREDICTIONRESPONSE']._serialized_start=274 + _globals['_PREDICTIONRESPONSE']._serialized_end=314 + _globals['_TORCHSERVEHEALTHRESPONSE']._serialized_start=316 + _globals['_TORCHSERVEHEALTHRESPONSE']._serialized_end=358 + _globals['_INFERENCEAPISSERVICE']._serialized_start=361 + _globals['_INFERENCEAPISSERVICE']._serialized_end=736 +# @@protoc_insertion_point(module_scope) diff --git a/docs/modelserving/v1beta1/torchserve/inference_pb2_grpc.py b/docs/modelserving/v1beta1/torchserve/inference_pb2_grpc.py new file mode 100644 index 000000000..70810d99b --- /dev/null +++ b/docs/modelserving/v1beta1/torchserve/inference_pb2_grpc.py @@ -0,0 +1,136 @@ +# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! +"""Client and server classes corresponding to protobuf-defined services.""" +import grpc + +from google.protobuf import empty_pb2 as google_dot_protobuf_dot_empty__pb2 +import inference_pb2 as inference__pb2 + + +class InferenceAPIsServiceStub(object): + """Missing associated documentation comment in .proto file.""" + + def __init__(self, channel): + """Constructor. + + Args: + channel: A grpc.Channel. + """ + self.Ping = channel.unary_unary( + '/org.pytorch.serve.grpc.inference.InferenceAPIsService/Ping', + request_serializer=google_dot_protobuf_dot_empty__pb2.Empty.SerializeToString, + response_deserializer=inference__pb2.TorchServeHealthResponse.FromString, + ) + self.Predictions = channel.unary_unary( + '/org.pytorch.serve.grpc.inference.InferenceAPIsService/Predictions', + request_serializer=inference__pb2.PredictionsRequest.SerializeToString, + response_deserializer=inference__pb2.PredictionResponse.FromString, + ) + self.StreamPredictions = channel.unary_stream( + '/org.pytorch.serve.grpc.inference.InferenceAPIsService/StreamPredictions', + request_serializer=inference__pb2.PredictionsRequest.SerializeToString, + response_deserializer=inference__pb2.PredictionResponse.FromString, + ) + + +class InferenceAPIsServiceServicer(object): + """Missing associated documentation comment in .proto file.""" + + def Ping(self, request, context): + """Check health status of the TorchServe server. + """ + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def Predictions(self, request, context): + """Predictions entry point to get inference using default model version. + """ + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StreamPredictions(self, request, context): + """Streaming response for an inference request. + """ + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + +def add_InferenceAPIsServiceServicer_to_server(servicer, server): + rpc_method_handlers = { + 'Ping': grpc.unary_unary_rpc_method_handler( + servicer.Ping, + request_deserializer=google_dot_protobuf_dot_empty__pb2.Empty.FromString, + response_serializer=inference__pb2.TorchServeHealthResponse.SerializeToString, + ), + 'Predictions': grpc.unary_unary_rpc_method_handler( + servicer.Predictions, + request_deserializer=inference__pb2.PredictionsRequest.FromString, + response_serializer=inference__pb2.PredictionResponse.SerializeToString, + ), + 'StreamPredictions': grpc.unary_stream_rpc_method_handler( + servicer.StreamPredictions, + request_deserializer=inference__pb2.PredictionsRequest.FromString, + response_serializer=inference__pb2.PredictionResponse.SerializeToString, + ), + } + generic_handler = grpc.method_handlers_generic_handler( + 'org.pytorch.serve.grpc.inference.InferenceAPIsService', rpc_method_handlers) + server.add_generic_rpc_handlers((generic_handler,)) + + + # This class is part of an EXPERIMENTAL API. +class InferenceAPIsService(object): + """Missing associated documentation comment in .proto file.""" + + @staticmethod + def Ping(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/org.pytorch.serve.grpc.inference.InferenceAPIsService/Ping', + google_dot_protobuf_dot_empty__pb2.Empty.SerializeToString, + inference__pb2.TorchServeHealthResponse.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def Predictions(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/org.pytorch.serve.grpc.inference.InferenceAPIsService/Predictions', + inference__pb2.PredictionsRequest.SerializeToString, + inference__pb2.PredictionResponse.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StreamPredictions(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_stream(request, target, '/org.pytorch.serve.grpc.inference.InferenceAPIsService/StreamPredictions', + inference__pb2.PredictionsRequest.SerializeToString, + inference__pb2.PredictionResponse.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/docs/modelserving/v1beta1/torchserve/management_pb2.py b/docs/modelserving/v1beta1/torchserve/management_pb2.py new file mode 100644 index 000000000..9e63dd787 --- /dev/null +++ b/docs/modelserving/v1beta1/torchserve/management_pb2.py @@ -0,0 +1,40 @@ +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: management.proto +"""Generated protocol buffer code.""" +from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import symbol_database as _symbol_database +from google.protobuf.internal import builder as _builder +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + + + +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x10management.proto\x12!org.pytorch.serve.grpc.management\"!\n\x12ManagementResponse\x12\x0b\n\x03msg\x18\x01 \x01(\t\"U\n\x14\x44\x65scribeModelRequest\x12\x12\n\nmodel_name\x18\x01 \x01(\t\x12\x15\n\rmodel_version\x18\x02 \x01(\t\x12\x12\n\ncustomized\x18\x03 \x01(\x08\";\n\x11ListModelsRequest\x12\r\n\x05limit\x18\x01 \x01(\x05\x12\x17\n\x0fnext_page_token\x18\x02 \x01(\x05\"\xe2\x01\n\x14RegisterModelRequest\x12\x12\n\nbatch_size\x18\x01 \x01(\x05\x12\x0f\n\x07handler\x18\x02 \x01(\t\x12\x17\n\x0finitial_workers\x18\x03 \x01(\x05\x12\x17\n\x0fmax_batch_delay\x18\x04 \x01(\x05\x12\x12\n\nmodel_name\x18\x05 \x01(\t\x12\x18\n\x10response_timeout\x18\x06 \x01(\x05\x12\x0f\n\x07runtime\x18\x07 \x01(\t\x12\x13\n\x0bsynchronous\x18\x08 \x01(\x08\x12\x0b\n\x03url\x18\t \x01(\t\x12\x12\n\ns3_sse_kms\x18\n \x01(\x08\"\xa1\x01\n\x12ScaleWorkerRequest\x12\x12\n\nmodel_name\x18\x01 \x01(\t\x12\x15\n\rmodel_version\x18\x02 \x01(\t\x12\x12\n\nmax_worker\x18\x03 \x01(\x05\x12\x12\n\nmin_worker\x18\x04 \x01(\x05\x12\x12\n\nnumber_gpu\x18\x05 \x01(\x05\x12\x13\n\x0bsynchronous\x18\x06 \x01(\x08\x12\x0f\n\x07timeout\x18\x07 \x01(\x05\">\n\x11SetDefaultRequest\x12\x12\n\nmodel_name\x18\x01 \x01(\t\x12\x15\n\rmodel_version\x18\x02 \x01(\t\"C\n\x16UnregisterModelRequest\x12\x12\n\nmodel_name\x18\x01 \x01(\t\x12\x15\n\rmodel_version\x18\x02 \x01(\t2\xa0\x06\n\x15ManagementAPIsService\x12\x81\x01\n\rDescribeModel\x12\x37.org.pytorch.serve.grpc.management.DescribeModelRequest\x1a\x35.org.pytorch.serve.grpc.management.ManagementResponse\"\x00\x12{\n\nListModels\x12\x34.org.pytorch.serve.grpc.management.ListModelsRequest\x1a\x35.org.pytorch.serve.grpc.management.ManagementResponse\"\x00\x12\x81\x01\n\rRegisterModel\x12\x37.org.pytorch.serve.grpc.management.RegisterModelRequest\x1a\x35.org.pytorch.serve.grpc.management.ManagementResponse\"\x00\x12}\n\x0bScaleWorker\x12\x35.org.pytorch.serve.grpc.management.ScaleWorkerRequest\x1a\x35.org.pytorch.serve.grpc.management.ManagementResponse\"\x00\x12{\n\nSetDefault\x12\x34.org.pytorch.serve.grpc.management.SetDefaultRequest\x1a\x35.org.pytorch.serve.grpc.management.ManagementResponse\"\x00\x12\x85\x01\n\x0fUnregisterModel\x12\x39.org.pytorch.serve.grpc.management.UnregisterModelRequest\x1a\x35.org.pytorch.serve.grpc.management.ManagementResponse\"\x00\x42\x02P\x01\x62\x06proto3') + +_globals = globals() +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'management_pb2', _globals) +if _descriptor._USE_C_DESCRIPTORS == False: + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'P\001' + _globals['_MANAGEMENTRESPONSE']._serialized_start=55 + _globals['_MANAGEMENTRESPONSE']._serialized_end=88 + _globals['_DESCRIBEMODELREQUEST']._serialized_start=90 + _globals['_DESCRIBEMODELREQUEST']._serialized_end=175 + _globals['_LISTMODELSREQUEST']._serialized_start=177 + _globals['_LISTMODELSREQUEST']._serialized_end=236 + _globals['_REGISTERMODELREQUEST']._serialized_start=239 + _globals['_REGISTERMODELREQUEST']._serialized_end=465 + _globals['_SCALEWORKERREQUEST']._serialized_start=468 + _globals['_SCALEWORKERREQUEST']._serialized_end=629 + _globals['_SETDEFAULTREQUEST']._serialized_start=631 + _globals['_SETDEFAULTREQUEST']._serialized_end=693 + _globals['_UNREGISTERMODELREQUEST']._serialized_start=695 + _globals['_UNREGISTERMODELREQUEST']._serialized_end=762 + _globals['_MANAGEMENTAPISSERVICE']._serialized_start=765 + _globals['_MANAGEMENTAPISSERVICE']._serialized_end=1565 +# @@protoc_insertion_point(module_scope) diff --git a/docs/modelserving/v1beta1/torchserve/management_pb2_grpc.py b/docs/modelserving/v1beta1/torchserve/management_pb2_grpc.py new file mode 100644 index 000000000..3392e92b5 --- /dev/null +++ b/docs/modelserving/v1beta1/torchserve/management_pb2_grpc.py @@ -0,0 +1,237 @@ +# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! +"""Client and server classes corresponding to protobuf-defined services.""" +import grpc + +import management_pb2 as management__pb2 + + +class ManagementAPIsServiceStub(object): + """Missing associated documentation comment in .proto file.""" + + def __init__(self, channel): + """Constructor. + + Args: + channel: A grpc.Channel. + """ + self.DescribeModel = channel.unary_unary( + '/org.pytorch.serve.grpc.management.ManagementAPIsService/DescribeModel', + request_serializer=management__pb2.DescribeModelRequest.SerializeToString, + response_deserializer=management__pb2.ManagementResponse.FromString, + ) + self.ListModels = channel.unary_unary( + '/org.pytorch.serve.grpc.management.ManagementAPIsService/ListModels', + request_serializer=management__pb2.ListModelsRequest.SerializeToString, + response_deserializer=management__pb2.ManagementResponse.FromString, + ) + self.RegisterModel = channel.unary_unary( + '/org.pytorch.serve.grpc.management.ManagementAPIsService/RegisterModel', + request_serializer=management__pb2.RegisterModelRequest.SerializeToString, + response_deserializer=management__pb2.ManagementResponse.FromString, + ) + self.ScaleWorker = channel.unary_unary( + '/org.pytorch.serve.grpc.management.ManagementAPIsService/ScaleWorker', + request_serializer=management__pb2.ScaleWorkerRequest.SerializeToString, + response_deserializer=management__pb2.ManagementResponse.FromString, + ) + self.SetDefault = channel.unary_unary( + '/org.pytorch.serve.grpc.management.ManagementAPIsService/SetDefault', + request_serializer=management__pb2.SetDefaultRequest.SerializeToString, + response_deserializer=management__pb2.ManagementResponse.FromString, + ) + self.UnregisterModel = channel.unary_unary( + '/org.pytorch.serve.grpc.management.ManagementAPIsService/UnregisterModel', + request_serializer=management__pb2.UnregisterModelRequest.SerializeToString, + response_deserializer=management__pb2.ManagementResponse.FromString, + ) + + +class ManagementAPIsServiceServicer(object): + """Missing associated documentation comment in .proto file.""" + + def DescribeModel(self, request, context): + """Provides detailed information about the default version of a model. + """ + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def ListModels(self, request, context): + """List registered models in TorchServe. + """ + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def RegisterModel(self, request, context): + """Register a new model in TorchServe. + """ + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def ScaleWorker(self, request, context): + """Configure number of workers for a default version of a model.This is a asynchronous call by default. Caller need to call describeModel to check if the model workers has been changed. + """ + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def SetDefault(self, request, context): + """Set default version of a model + """ + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def UnregisterModel(self, request, context): + """Unregister the default version of a model from TorchServe if it is the only version available.This is a asynchronous call by default. Caller can call listModels to confirm model is unregistered + """ + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + +def add_ManagementAPIsServiceServicer_to_server(servicer, server): + rpc_method_handlers = { + 'DescribeModel': grpc.unary_unary_rpc_method_handler( + servicer.DescribeModel, + request_deserializer=management__pb2.DescribeModelRequest.FromString, + response_serializer=management__pb2.ManagementResponse.SerializeToString, + ), + 'ListModels': grpc.unary_unary_rpc_method_handler( + servicer.ListModels, + request_deserializer=management__pb2.ListModelsRequest.FromString, + response_serializer=management__pb2.ManagementResponse.SerializeToString, + ), + 'RegisterModel': grpc.unary_unary_rpc_method_handler( + servicer.RegisterModel, + request_deserializer=management__pb2.RegisterModelRequest.FromString, + response_serializer=management__pb2.ManagementResponse.SerializeToString, + ), + 'ScaleWorker': grpc.unary_unary_rpc_method_handler( + servicer.ScaleWorker, + request_deserializer=management__pb2.ScaleWorkerRequest.FromString, + response_serializer=management__pb2.ManagementResponse.SerializeToString, + ), + 'SetDefault': grpc.unary_unary_rpc_method_handler( + servicer.SetDefault, + request_deserializer=management__pb2.SetDefaultRequest.FromString, + response_serializer=management__pb2.ManagementResponse.SerializeToString, + ), + 'UnregisterModel': grpc.unary_unary_rpc_method_handler( + servicer.UnregisterModel, + request_deserializer=management__pb2.UnregisterModelRequest.FromString, + response_serializer=management__pb2.ManagementResponse.SerializeToString, + ), + } + generic_handler = grpc.method_handlers_generic_handler( + 'org.pytorch.serve.grpc.management.ManagementAPIsService', rpc_method_handlers) + server.add_generic_rpc_handlers((generic_handler,)) + + + # This class is part of an EXPERIMENTAL API. +class ManagementAPIsService(object): + """Missing associated documentation comment in .proto file.""" + + @staticmethod + def DescribeModel(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/org.pytorch.serve.grpc.management.ManagementAPIsService/DescribeModel', + management__pb2.DescribeModelRequest.SerializeToString, + management__pb2.ManagementResponse.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def ListModels(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/org.pytorch.serve.grpc.management.ManagementAPIsService/ListModels', + management__pb2.ListModelsRequest.SerializeToString, + management__pb2.ManagementResponse.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def RegisterModel(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/org.pytorch.serve.grpc.management.ManagementAPIsService/RegisterModel', + management__pb2.RegisterModelRequest.SerializeToString, + management__pb2.ManagementResponse.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def ScaleWorker(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/org.pytorch.serve.grpc.management.ManagementAPIsService/ScaleWorker', + management__pb2.ScaleWorkerRequest.SerializeToString, + management__pb2.ManagementResponse.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def SetDefault(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/org.pytorch.serve.grpc.management.ManagementAPIsService/SetDefault', + management__pb2.SetDefaultRequest.SerializeToString, + management__pb2.ManagementResponse.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def UnregisterModel(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/org.pytorch.serve.grpc.management.ManagementAPIsService/UnregisterModel', + management__pb2.UnregisterModelRequest.SerializeToString, + management__pb2.ManagementResponse.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/docs/modelserving/v1beta1/torchserve/mnist_grpc.yaml b/docs/modelserving/v1beta1/torchserve/mnist_grpc.yaml new file mode 100644 index 000000000..1ff960725 --- /dev/null +++ b/docs/modelserving/v1beta1/torchserve/mnist_grpc.yaml @@ -0,0 +1,14 @@ +apiVersion: "serving.kserve.io/v1beta1" +kind: "InferenceService" +metadata: + name: "torchserve-grpc" +spec: + predictor: + model: + modelFormat: + name: pytorch + storageUri: gs://kfserving-examples/models/torchserve/image_classifier/v1 + ports: + - containerPort: 7070 + name: h2c + protocol: TCP diff --git a/docs/modelserving/v1beta1/torchserve/mnist_grpc_v2.yaml b/docs/modelserving/v1beta1/torchserve/mnist_grpc_v2.yaml new file mode 100644 index 000000000..0b38a9cef --- /dev/null +++ b/docs/modelserving/v1beta1/torchserve/mnist_grpc_v2.yaml @@ -0,0 +1,15 @@ +apiVersion: serving.kserve.io/v1beta1 +kind: InferenceService +metadata: + name: "torchserve-grpc-v2" +spec: + predictor: + model: + modelFormat: + name: pytorch + protocolVersion: v2 + storageUri: gs://kfserving-examples/models/torchserve/image_classifier/v2 + ports: + - containerPort: 8081 + name: h2c + protocol: TCP diff --git a/docs/modelserving/v1beta1/torchserve/mnist_v2_grpc_tensor.json b/docs/modelserving/v1beta1/torchserve/mnist_v2_grpc_tensor.json new file mode 100644 index 000000000..5d1af14ce --- /dev/null +++ b/docs/modelserving/v1beta1/torchserve/mnist_v2_grpc_tensor.json @@ -0,0 +1,803 @@ +{ + "id": "d3b15cad-50a2-4eaf-80ce-8b0a428bd298", + "model_name": "mnist", + "inputs": [ + { + "name": "input-0", + "shape": [ + 1, + 28, + 28 + ], + "datatype": "FP32", + "contents": { + "fp32_contents": [ + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.2392, + 0.0118, + 0.1647, + 0.4627, + 0.7569, + 0.4627, + 0.4627, + 0.2392, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0549, + 0.702, + 0.9608, + 0.9255, + 0.949, + 0.9961, + 0.9961, + 0.9961, + 0.9961, + 0.9608, + 0.9216, + 0.3294, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.5922, + 0.9961, + 0.9961, + 0.9961, + 0.8353, + 0.7529, + 0.698, + 0.698, + 0.7059, + 0.9961, + 0.9961, + 0.9451, + 0.1804, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.1686, + 0.9216, + 0.9961, + 0.8863, + 0.251, + 0.1098, + 0.0471, + 0.0, + 0.0, + 0.0078, + 0.502, + 0.9882, + 1.0, + 0.6784, + 0.0667, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.2196, + 0.9961, + 0.9922, + 0.4196, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.5255, + 0.9804, + 0.9961, + 0.2941, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.2471, + 0.9961, + 0.6196, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.8667, + 0.9961, + 0.6157, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.7608, + 0.9961, + 0.4039, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.5882, + 0.9961, + 0.8353, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.1333, + 0.8627, + 0.9373, + 0.2275, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.3294, + 0.9961, + 0.8353, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.4941, + 0.9961, + 0.6706, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.3294, + 0.9961, + 0.8353, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.8392, + 0.9373, + 0.2353, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.3294, + 0.9961, + 0.8353, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.8392, + 0.7804, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.3294, + 0.9961, + 0.8353, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0431, + 0.8588, + 0.7804, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.3294, + 0.9961, + 0.8353, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.3843, + 0.9961, + 0.7804, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.6353, + 0.9961, + 0.8196, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.3843, + 0.9961, + 0.7804, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.2, + 0.9333, + 0.9961, + 0.2941, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.3843, + 0.9961, + 0.7804, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.2, + 0.6471, + 0.9961, + 0.7647, + 0.0157, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.2588, + 0.9451, + 0.7804, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0118, + 0.6549, + 0.9961, + 0.8902, + 0.2157, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.8392, + 0.8353, + 0.0784, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.1804, + 0.5961, + 0.7922, + 0.9961, + 0.9961, + 0.2471, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.8392, + 0.9961, + 0.8, + 0.7059, + 0.7059, + 0.7059, + 0.7059, + 0.7059, + 0.9216, + 0.9961, + 0.9961, + 0.9176, + 0.6118, + 0.0392, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.3176, + 0.8039, + 0.9961, + 0.9961, + 0.9961, + 0.9961, + 0.9961, + 0.9961, + 0.9961, + 0.9882, + 0.9176, + 0.4706, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.102, + 0.8235, + 0.9961, + 0.9961, + 0.9961, + 0.9961, + 0.9961, + 0.6, + 0.4078, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ] + } + } + ] +} \ No newline at end of file diff --git a/docs/modelserving/v1beta1/torchserve/proto/v1/inference.proto b/docs/modelserving/v1beta1/torchserve/proto/v1/inference.proto new file mode 100644 index 000000000..338e36ff2 --- /dev/null +++ b/docs/modelserving/v1beta1/torchserve/proto/v1/inference.proto @@ -0,0 +1,39 @@ +syntax = "proto3"; + +package org.pytorch.serve.grpc.inference; + +import "google/protobuf/empty.proto"; + +option java_multiple_files = true; + +message PredictionsRequest { + // Name of model. + string model_name = 1; //required + + // Version of model to run prediction on. + string model_version = 2; //optional + + // Input data for model prediction + map input = 3; //required +} + +message PredictionResponse { + // Response content for prediction + bytes prediction = 1; +} + +message TorchServeHealthResponse { + // TorchServe health + string health = 1; +} + +service InferenceAPIsService { + // Check health status of the TorchServe server. + rpc Ping(google.protobuf.Empty) returns (TorchServeHealthResponse) {} + + // Predictions entry point to get inference using default model version. + rpc Predictions(PredictionsRequest) returns (PredictionResponse) {} + + // Streaming response for an inference request. + rpc StreamPredictions(PredictionsRequest) returns (stream PredictionResponse) {} +} diff --git a/docs/modelserving/v1beta1/torchserve/proto/v1/management.proto b/docs/modelserving/v1beta1/torchserve/proto/v1/management.proto new file mode 100644 index 000000000..ad65c33ca --- /dev/null +++ b/docs/modelserving/v1beta1/torchserve/proto/v1/management.proto @@ -0,0 +1,119 @@ +syntax = "proto3"; + +package org.pytorch.serve.grpc.management; + +option java_multiple_files = true; + +message ManagementResponse { + // Response string of different management API calls. + string msg = 1; +} + +message DescribeModelRequest { + // Name of model to describe. + string model_name = 1; //required + // Version of model to describe. + string model_version = 2; //optional + // Customized metadata + bool customized = 3; //optional +} + +message ListModelsRequest { + // Use this parameter to specify the maximum number of items to return. When this value is present, TorchServe does not return more than the specified number of items, but it might return fewer. This value is optional. If you include a value, it must be between 1 and 1000, inclusive. If you do not include a value, it defaults to 100. + int32 limit = 1; //optional + + // The token to retrieve the next set of results. TorchServe provides the token when the response from a previous call has more results than the maximum page size. + int32 next_page_token = 2; //optional +} + +message RegisterModelRequest { + // Inference batch size, default: 1. + int32 batch_size = 1; //optional + + // Inference handler entry-point. This value will override handler in MANIFEST.json if present. + string handler = 2; //optional + + // Number of initial workers, default: 0. + int32 initial_workers = 3; //optional + + // Maximum delay for batch aggregation, default: 100. + int32 max_batch_delay = 4; //optional + + // Name of model. This value will override modelName in MANIFEST.json if present. + string model_name = 5; //optional + + // Maximum time, in seconds, the TorchServe waits for a response from the model inference code, default: 120. + int32 response_timeout = 6; //optional + + // Runtime for the model custom service code. This value will override runtime in MANIFEST.json if present. + string runtime = 7; //optional + + // Decides whether creation of worker synchronous or not, default: false. + bool synchronous = 8; //optional + + // Model archive download url, support local file or HTTP(s) protocol. + string url = 9; //required + + // Decides whether S3 SSE KMS enabled or not, default: false. + bool s3_sse_kms = 10; //optional +} + +message ScaleWorkerRequest { + + // Name of model to scale workers. + string model_name = 1; //required + + // Model version. + string model_version = 2; //optional + + // Maximum number of worker processes. + int32 max_worker = 3; //optional + + // Minimum number of worker processes. + int32 min_worker = 4; //optional + + // Number of GPU worker processes to create. + int32 number_gpu = 5; //optional + + // Decides whether the call is synchronous or not, default: false. + bool synchronous = 6; //optional + + // Waiting up to the specified wait time if necessary for a worker to complete all pending requests. Use 0 to terminate backend worker process immediately. Use -1 for wait infinitely. + int32 timeout = 7; //optional +} + +message SetDefaultRequest { + // Name of model whose default version needs to be updated. + string model_name = 1; //required + + // Version of model to be set as default version for the model + string model_version = 2; //required +} + +message UnregisterModelRequest { + // Name of model to unregister. + string model_name = 1; //required + + // Name of model to unregister. + string model_version = 2; //optional +} + +service ManagementAPIsService { + // Provides detailed information about the default version of a model. + rpc DescribeModel(DescribeModelRequest) returns (ManagementResponse) {} + + // List registered models in TorchServe. + rpc ListModels(ListModelsRequest) returns (ManagementResponse) {} + + // Register a new model in TorchServe. + rpc RegisterModel(RegisterModelRequest) returns (ManagementResponse) {} + + // Configure number of workers for a default version of a model.This is a asynchronous call by default. Caller need to call describeModel to check if the model workers has been changed. + rpc ScaleWorker(ScaleWorkerRequest) returns (ManagementResponse) {} + + // Set default version of a model + rpc SetDefault(SetDefaultRequest) returns (ManagementResponse) {} + + // Unregister the default version of a model from TorchServe if it is the only version available.This is a asynchronous call by default. Caller can call listModels to confirm model is unregistered + rpc UnregisterModel(UnregisterModelRequest) returns (ManagementResponse) {} +} \ No newline at end of file diff --git a/docs/modelserving/v1beta1/torchserve/proto/v2/grpc_predict_v2.proto b/docs/modelserving/v1beta1/torchserve/proto/v2/grpc_predict_v2.proto new file mode 100644 index 000000000..718724fe6 --- /dev/null +++ b/docs/modelserving/v1beta1/torchserve/proto/v2/grpc_predict_v2.proto @@ -0,0 +1,362 @@ +// Copyright 2022 The KServe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; +package inference; + +// Inference Server GRPC endpoints. +service GRPCInferenceService +{ + // The ServerLive API indicates if the inference server is able to receive + // and respond to metadata and inference requests. + rpc ServerLive(ServerLiveRequest) returns (ServerLiveResponse) {} + + // The ServerReady API indicates if the server is ready for inferencing. + rpc ServerReady(ServerReadyRequest) returns (ServerReadyResponse) {} + + // The ModelReady API indicates if a specific model is ready for inferencing. + rpc ModelReady(ModelReadyRequest) returns (ModelReadyResponse) {} + + // The ServerMetadata API provides information about the server. Errors are + // indicated by the google.rpc.Status returned for the request. The OK code + // indicates success and other codes indicate failure. + rpc ServerMetadata(ServerMetadataRequest) returns (ServerMetadataResponse) {} + + // The per-model metadata API provides information about a model. Errors are + // indicated by the google.rpc.Status returned for the request. The OK code + // indicates success and other codes indicate failure. + rpc ModelMetadata(ModelMetadataRequest) returns (ModelMetadataResponse) {} + + // The ModelInfer API performs inference using the specified model. Errors are + // indicated by the google.rpc.Status returned for the request. The OK code + // indicates success and other codes indicate failure. + rpc ModelInfer(ModelInferRequest) returns (ModelInferResponse) {} + + // Load or reload a model from a repository. + rpc RepositoryModelLoad(RepositoryModelLoadRequest) returns (RepositoryModelLoadResponse) {} + + // Unload a model. + rpc RepositoryModelUnload(RepositoryModelUnloadRequest) returns (RepositoryModelUnloadResponse) {} +} + +message ServerLiveRequest {} + +message ServerLiveResponse +{ + // True if the inference server is live, false if not live. + bool live = 1; +} + +message ServerReadyRequest {} + +message ServerReadyResponse +{ + // True if the inference server is ready, false if not ready. + bool ready = 1; +} + +message ModelReadyRequest +{ + // The name of the model to check for readiness. + string name = 1; + + // The version of the model to check for readiness. If not given the + // server will choose a version based on the model and internal policy. + string version = 2; +} + +message ModelReadyResponse +{ + // True if the model is ready, false if not ready. + bool ready = 1; +} + +message ServerMetadataRequest {} + +message ServerMetadataResponse +{ + // The server name. + string name = 1; + + // The server version. + string version = 2; + + // The extensions supported by the server. + repeated string extensions = 3; +} + +message ModelMetadataRequest +{ + // The name of the model. + string name = 1; + + // The version of the model to check for readiness. If not given the + // server will choose a version based on the model and internal policy. + string version = 2; +} + +message ModelMetadataResponse +{ + // Metadata for a tensor. + message TensorMetadata + { + // The tensor name. + string name = 1; + + // The tensor data type. + string datatype = 2; + + // The tensor shape. A variable-size dimension is represented + // by a -1 value. + repeated int64 shape = 3; + } + + // The model name. + string name = 1; + + // The versions of the model available on the server. + repeated string versions = 2; + + // The model's platform. See Platforms. + string platform = 3; + + // The model's inputs. + repeated TensorMetadata inputs = 4; + + // The model's outputs. + repeated TensorMetadata outputs = 5; +} + +message ModelInferRequest +{ + // An input tensor for an inference request. + message InferInputTensor + { + // The tensor name. + string name = 1; + + // The tensor data type. + string datatype = 2; + + // The tensor shape. + repeated int64 shape = 3; + + // Optional inference input tensor parameters. + map parameters = 4; + + // The tensor contents using a data-type format. This field must + // not be specified if "raw" tensor contents are being used for + // the inference request. + InferTensorContents contents = 5; + } + + // An output tensor requested for an inference request. + message InferRequestedOutputTensor + { + // The tensor name. + string name = 1; + + // Optional requested output tensor parameters. + map parameters = 2; + } + + // The name of the model to use for inferencing. + string model_name = 1; + + // The version of the model to use for inference. If not given the + // server will choose a version based on the model and internal policy. + string model_version = 2; + + // Optional identifier for the request. If specified will be + // returned in the response. + string id = 3; + + // Optional inference parameters. + map parameters = 4; + + // The input tensors for the inference. + repeated InferInputTensor inputs = 5; + + // The requested output tensors for the inference. Optional, if not + // specified all outputs produced by the model will be returned. + repeated InferRequestedOutputTensor outputs = 6; + + // The data contained in an input tensor can be represented in "raw" + // bytes form or in the repeated type that matches the tensor's data + // type. To use the raw representation 'raw_input_contents' must be + // initialized with data for each tensor in the same order as + // 'inputs'. For each tensor, the size of this content must match + // what is expected by the tensor's shape and data type. The raw + // data must be the flattened, one-dimensional, row-major order of + // the tensor elements without any stride or padding between the + // elements. Note that the FP16 and BF16 data types must be represented as + // raw content as there is no specific data type for a 16-bit float type. + // + // If this field is specified then InferInputTensor::contents must + // not be specified for any input tensor. + repeated bytes raw_input_contents = 7; +} + +message ModelInferResponse +{ + // An output tensor returned for an inference request. + message InferOutputTensor + { + // The tensor name. + string name = 1; + + // The tensor data type. + string datatype = 2; + + // The tensor shape. + repeated int64 shape = 3; + + // Optional output tensor parameters. + map parameters = 4; + + // The tensor contents using a data-type format. This field must + // not be specified if "raw" tensor contents are being used for + // the inference response. + InferTensorContents contents = 5; + } + + // The name of the model used for inference. + string model_name = 1; + + // The version of the model used for inference. + string model_version = 2; + + // The id of the inference request if one was specified. + string id = 3; + + // Optional inference response parameters. + map parameters = 4; + + // The output tensors holding inference results. + repeated InferOutputTensor outputs = 5; + + // The data contained in an output tensor can be represented in + // "raw" bytes form or in the repeated type that matches the + // tensor's data type. To use the raw representation 'raw_output_contents' + // must be initialized with data for each tensor in the same order as + // 'outputs'. For each tensor, the size of this content must match + // what is expected by the tensor's shape and data type. The raw + // data must be the flattened, one-dimensional, row-major order of + // the tensor elements without any stride or padding between the + // elements. Note that the FP16 and BF16 data types must be represented as + // raw content as there is no specific data type for a 16-bit float type. + // + // If this field is specified then InferOutputTensor::contents must + // not be specified for any output tensor. + repeated bytes raw_output_contents = 6; +} + +// An inference parameter value. The Parameters message describes a +// “name”/”value” pair, where the “name” is the name of the parameter +// and the “value” is a boolean, integer, or string corresponding to +// the parameter. +message InferParameter +{ + // The parameter value can be a string, an int64, a boolean + // or a message specific to a predefined parameter. + oneof parameter_choice + { + // A boolean parameter value. + bool bool_param = 1; + + // An int64 parameter value. + int64 int64_param = 2; + + // A string parameter value. + string string_param = 3; + } +} + +// The data contained in a tensor represented by the repeated type +// that matches the tensor's data type. Protobuf oneof is not used +// because oneofs cannot contain repeated fields. +message InferTensorContents +{ + // Representation for BOOL data type. The size must match what is + // expected by the tensor's shape. The contents must be the flattened, + // one-dimensional, row-major order of the tensor elements. + repeated bool bool_contents = 1; + + // Representation for INT8, INT16, and INT32 data types. The size + // must match what is expected by the tensor's shape. The contents + // must be the flattened, one-dimensional, row-major order of the + // tensor elements. + repeated int32 int_contents = 2; + + // Representation for INT64 data types. The size must match what + // is expected by the tensor's shape. The contents must be the + // flattened, one-dimensional, row-major order of the tensor elements. + repeated int64 int64_contents = 3; + + // Representation for UINT8, UINT16, and UINT32 data types. The size + // must match what is expected by the tensor's shape. The contents + // must be the flattened, one-dimensional, row-major order of the + // tensor elements. + repeated uint32 uint_contents = 4; + + // Representation for UINT64 data types. The size must match what + // is expected by the tensor's shape. The contents must be the + // flattened, one-dimensional, row-major order of the tensor elements. + repeated uint64 uint64_contents = 5; + + // Representation for FP32 data type. The size must match what is + // expected by the tensor's shape. The contents must be the flattened, + // one-dimensional, row-major order of the tensor elements. + repeated float fp32_contents = 6; + + // Representation for FP64 data type. The size must match what is + // expected by the tensor's shape. The contents must be the flattened, + // one-dimensional, row-major order of the tensor elements. + repeated double fp64_contents = 7; + + // Representation for BYTES data type. The size must match what is + // expected by the tensor's shape. The contents must be the flattened, + // one-dimensional, row-major order of the tensor elements. + repeated bytes bytes_contents = 8; +} + +message RepositoryModelLoadRequest +{ + // The name of the model to load, or reload. + string model_name = 1; +} + +message RepositoryModelLoadResponse +{ + // The name of the model trying to load or reload. + string model_name = 1; + + // boolean parameter to indicate whether model is loaded or not + bool isLoaded = 2; +} + +message RepositoryModelUnloadRequest +{ + // The name of the model to unload. + string model_name = 1; +} + +message RepositoryModelUnloadResponse +{ + // The name of the model trying to load or reload. + string model_name = 1; + + // boolean parameter to indicate whether model is unloaded or not + bool isUnloaded = 2; +} diff --git a/docs/modelserving/v1beta1/torchserve/torchserve_grpc_client.py b/docs/modelserving/v1beta1/torchserve/torchserve_grpc_client.py new file mode 100644 index 000000000..82b096318 --- /dev/null +++ b/docs/modelserving/v1beta1/torchserve/torchserve_grpc_client.py @@ -0,0 +1,178 @@ +import grpc +import inference_pb2 +import inference_pb2_grpc +import management_pb2 +import management_pb2_grpc +import argparse + + +def get_inference_stub(host, port, hostname): + channel = grpc.insecure_channel( + host + ":" + str(port), + options=( + ( + "grpc.ssl_target_name_override", + hostname, + ), + ), + ) + stub = inference_pb2_grpc.InferenceAPIsServiceStub( + channel + ) + return stub + + +def get_management_stub(host, port, hostname): + channel = grpc.insecure_channel( + host + ":" + str(port), + options=( + ( + "grpc.ssl_target_name_override", + hostname, + ), + ), + ) + stub = management_pb2_grpc.ManagementAPIsServiceStub( + channel + ) + return stub + + +def infer(stub, model_name, model_input): + with open(model_input, "rb") as f: + data = f.read() + + input_data = {"data": data} + print(input_data) + response = stub.Predictions( + inference_pb2.PredictionsRequest( + model_name=model_name, + input=input_data, + ) + ) + + try: + prediction = response.prediction.decode( + "utf-8" + ) + print(prediction) + except grpc.RpcError: + exit(1) + + +def ping(stub): + response = stub.Ping( + inference_pb2.TorchServeHealthResponse() + ) + try: + health = response + print("Ping Response:", health) + except grpc.RpcError: + exit(1) + + +def register(stub, model_name, mar_set_str): + mar_set = set() + if mar_set_str: + mar_set = set(mar_set_str.split(",")) + marfile = f"{model_name}.mar" + print( + f"## Check {marfile} in mar_set :", + mar_set, + ) + if marfile not in mar_set: + marfile = "https://torchserve.s3.amazonaws.com/mar_files/{}.mar".format( + model_name + ) + + print(f"## Register marfile:{marfile}\n") + params = { + "url": marfile, + "initial_workers": 1, + "synchronous": True, + "model_name": model_name, + } + try: + stub.RegisterModel( + management_pb2.RegisterModelRequest( + **params + ) + ) + print( + f"Model {model_name} registered successfully" + ) + except grpc.RpcError as e: + print( + f"Failed to register model {model_name}." + ) + print(str(e.details())) + exit(1) + + +def unregister(stub, model_name): + try: + stub.UnregisterModel( + management_pb2.UnregisterModelRequest( + model_name=model_name + ) + ) + print( + f"Model {model_name} unregistered successfully" + ) + except grpc.RpcError as e: + print( + f"Failed to unregister model {model_name}." + ) + print(str(e.details())) + exit(1) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--host", + help="Ingress Host Name", + default="localhost", + type=str, + ) + parser.add_argument( + "--port", + help="Ingress Port", + default=8080, + type=int, + ) + parser.add_argument( + "--hostname", + help="Service Host Name", + default="", + type=str, + ) + parser.add_argument( + "--model", + help="Torchserve Model Name", + type=str, + ) + parser.add_argument( + "--api_name", + help="API Name", + default="ping", + type=str, + ) + parser.add_argument( + "--input_path", + help="Prediction data input path", + default="mnist.json", + type=str, + ) + + args = parser.parse_args() + stub = get_inference_stub( + args.host, args.port, args.hostname + ) + if args.api_name == "infer": + infer(stub, args.model, args.input_path) + elif args.api_name == "ping": + ping(stub) + else: + print("Invalid API name") + exit(1) From 54ccc1fb2e6367f87b7b500b5c18b8ff1809be0e Mon Sep 17 00:00:00 2001 From: Andrews Arokiam Date: Wed, 18 Oct 2023 11:34:21 +0530 Subject: [PATCH 2/7] Schema order changed. Signed-off-by: Andrews Arokiam --- .../modelserving/v1beta1/torchserve/README.md | 34 ++++++++----------- 1 file changed, 14 insertions(+), 20 deletions(-) diff --git a/docs/modelserving/v1beta1/torchserve/README.md b/docs/modelserving/v1beta1/torchserve/README.md index 732e7966b..c4778caa0 100644 --- a/docs/modelserving/v1beta1/torchserve/README.md +++ b/docs/modelserving/v1beta1/torchserve/README.md @@ -222,7 +222,7 @@ Expected Output $inferenceservice.serving.kserve.io/torchserve-grpc created ``` -=== "Old Schema" +=== "New Schema" ```yaml apiVersion: serving.kserve.io/v1beta1 @@ -231,7 +231,9 @@ $inferenceservice.serving.kserve.io/torchserve-grpc created name: "torchserve-grpc" spec: predictor: - pytorch: + model: + modelFormat: + name: pytorch storageUri: gs://kfserving-examples/models/torchserve/image_classifier/v1 ports: - containerPort: 7070 @@ -239,18 +241,16 @@ $inferenceservice.serving.kserve.io/torchserve-grpc created protocol: TCP ``` -=== "New Schema" +=== "Old Schema" ```yaml - apiVersion: "serving.kserve.io/v1beta1" - kind: "InferenceService" + apiVersion: serving.kserve.io/v1beta1 + kind: InferenceService metadata: name: "torchserve-grpc" spec: predictor: - model: - modelFormat: - name: pytorch + pytorch: storageUri: gs://kfserving-examples/models/torchserve/image_classifier/v1 ports: - containerPort: 7070 @@ -424,7 +424,7 @@ Expected Output $inferenceservice.serving.kserve.io/torchserve-grpc-v2 created ``` -=== "Old Schema" +=== "New Schema" ```yaml apiVersion: serving.kserve.io/v1beta1 @@ -433,7 +433,9 @@ $inferenceservice.serving.kserve.io/torchserve-grpc-v2 created name: "torchserve-grpc-v2" spec: predictor: - pytorch: + model: + modelFormat: + name: pytorch protocolVersion: v2 storageUri: gs://kfserving-examples/models/torchserve/image_classifier/v2 ports: @@ -442,7 +444,7 @@ $inferenceservice.serving.kserve.io/torchserve-grpc-v2 created protocol: TCP ``` -=== "New Schema" +=== "Old Schema" ```yaml apiVersion: serving.kserve.io/v1beta1 @@ -451,9 +453,7 @@ $inferenceservice.serving.kserve.io/torchserve-grpc-v2 created name: "torchserve-grpc-v2" spec: predictor: - model: - modelFormat: - name: pytorch + pytorch: protocolVersion: v2 storageUri: gs://kfserving-examples/models/torchserve/image_classifier/v2 ports: @@ -481,15 +481,9 @@ The first step is to [determine the ingress IP and ports](../../../get_started/f INPUT_PATH=./mnist_v2_grpc_tensor.json PROTO_FILE=proto/v2/grpc_predict_v2.proto SERVICE_HOSTNAME=$(kubectl get inferenceservice torchserve-grpc-v2 -o jsonpath='{.status.url}' | cut -d "/" -f 3) -``` - -### Make grpc call - -```bash grpcurl -v -plaintext -proto ${PROTO_FILE} -authority ${SERVICE_HOSTNAME} -d @ ${INGRESS_HOST}:${INGRESS_PORT} inference.GRPCInferenceService.ModelInfer <<< $(cat "$INPUT_PATH") ``` - !!! success "Expected Output" ```bash From b05513ffa83e925baecfbb1c997cb521d75f6c2e Mon Sep 17 00:00:00 2001 From: Andrews Arokiam Date: Fri, 20 Oct 2023 18:12:57 +0530 Subject: [PATCH 3/7] corrected v2 REST input. Signed-off-by: Andrews Arokiam --- docs/modelserving/v1beta1/torchserve/mnist_v2_bytes.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/modelserving/v1beta1/torchserve/mnist_v2_bytes.json b/docs/modelserving/v1beta1/torchserve/mnist_v2_bytes.json index 0c07866db..59b8c7174 100644 --- a/docs/modelserving/v1beta1/torchserve/mnist_v2_bytes.json +++ b/docs/modelserving/v1beta1/torchserve/mnist_v2_bytes.json @@ -1,10 +1,10 @@ { "inputs": [ { - "data": "iVBORw0KGgoAAAANSUhEUgAAABwAAAAcCAAAAABXZoBIAAAA10lEQVR4nGNgGFhgy6xVdrCszBaLFN/mr28+/QOCr69DMCSnA8WvHti0acu/fx/10OS0X/975CDDw8DA1PDn/1pBVEmLf3+zocy2X/+8USXt/82Ds+/+m4sqeehfOpw97d9VFDmlO++t4JwQNMm6f6sZcEpee2+DR/I4A05J7tt4JJP+IUsu+ncRp6TxO9RAQJY0XvrvMAuypNNHuCTz8n+PzVEcy3DtqgiY1ptx6t8/ewY0yX9ntoDA63//Xs3hQpMMPPsPAv68qmDAAFKXwHIzMzCl6AoAxXp0QujtP+8AAAAASUVORK5CYII=", + "data": ["iVBORw0KGgoAAAANSUhEUgAAABwAAAAcCAAAAABXZoBIAAAA10lEQVR4nGNgGFhgy6xVdrCszBaLFN/mr28+/QOCr69DMCSnA8WvHti0acu/fx/10OS0X/975CDDw8DA1PDn/1pBVEmLf3+zocy2X/+8USXt/82Ds+/+m4sqeehfOpw97d9VFDmlO++t4JwQNMm6f6sZcEpee2+DR/I4A05J7tt4JJP+IUsu+ncRp6TxO9RAQJY0XvrvMAuypNNHuCTz8n+PzVEcy3DtqgiY1ptx6t8/ewY0yX9ntoDA63//Xs3hQpMMPPsPAv68qmDAAFKXwHIzMzCl6AoAxXp0QujtP+8AAAAASUVORK5CYII="], "datatype": "BYTES", "name": "312a4eb0-0ca7-4803-a101-a6d2c18486fe", - "shape": -1 + "shape": [-1] } ] } \ No newline at end of file From 1b1272368139288e8f9325857f6dc408976f9f39 Mon Sep 17 00:00:00 2001 From: Andrews Arokiam Date: Thu, 2 Nov 2023 11:28:47 +0530 Subject: [PATCH 4/7] Updated grpc-v2 protocolVersion. Signed-off-by: Andrews Arokiam --- docs/modelserving/v1beta1/torchserve/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/modelserving/v1beta1/torchserve/README.md b/docs/modelserving/v1beta1/torchserve/README.md index c4778caa0..cd338bb3c 100644 --- a/docs/modelserving/v1beta1/torchserve/README.md +++ b/docs/modelserving/v1beta1/torchserve/README.md @@ -436,7 +436,7 @@ $inferenceservice.serving.kserve.io/torchserve-grpc-v2 created model: modelFormat: name: pytorch - protocolVersion: v2 + protocolVersion: grpc-v2 storageUri: gs://kfserving-examples/models/torchserve/image_classifier/v2 ports: - containerPort: 8081 @@ -454,7 +454,7 @@ $inferenceservice.serving.kserve.io/torchserve-grpc-v2 created spec: predictor: pytorch: - protocolVersion: v2 + protocolVersion: grpc-v2 storageUri: gs://kfserving-examples/models/torchserve/image_classifier/v2 ports: - containerPort: 8081 From b029600b17f237e69646f275b7beef844bd39d1f Mon Sep 17 00:00:00 2001 From: Dan Sun Date: Sun, 17 Dec 2023 23:51:25 -0500 Subject: [PATCH 5/7] Update README.md --- .../modelserving/v1beta1/torchserve/README.md | 70 +++++++++---------- 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/docs/modelserving/v1beta1/torchserve/README.md b/docs/modelserving/v1beta1/torchserve/README.md index cd338bb3c..385cf80f7 100644 --- a/docs/modelserving/v1beta1/torchserve/README.md +++ b/docs/modelserving/v1beta1/torchserve/README.md @@ -64,7 +64,7 @@ The KServe/TorchServe integration supports KServe v1/v2 REST protocol. In the `c ### Create the TorchServe InferenceService KServe by default selects the `TorchServe` runtime when you specify the model format `pytorch` on new model spec. -=== "Old Schema" +=== "New Schema" ```yaml apiVersion: "serving.kserve.io/v1beta1" @@ -73,11 +73,13 @@ KServe by default selects the `TorchServe` runtime when you specify the model fo name: "torchserve" spec: predictor: - pytorch: + model: + modelFormat: + name: pytorch storageUri: gs://kfserving-examples/models/torchserve/image_classifier/v1 ``` - -=== "New Schema" + +=== "Old Schema" ```yaml apiVersion: "serving.kserve.io/v1beta1" @@ -86,9 +88,7 @@ KServe by default selects the `TorchServe` runtime when you specify the model fo name: "torchserve" spec: predictor: - model: - modelFormat: - name: pytorch + pytorch: storageUri: gs://kfserving-examples/models/torchserve/image_classifier/v1 ``` @@ -98,7 +98,7 @@ For deploying the model on CPU, apply the following [torchserve.yaml](./torchser kubectl apply -f torchserve.yaml ``` -=== "Old Schema" +=== "New Schema" ```yaml apiVersion: "serving.kserve.io/v1beta1" @@ -107,7 +107,9 @@ kubectl apply -f torchserve.yaml name: "torchserve" spec: predictor: - pytorch: + model: + modelFormat: + name: pytorch storageUri: gs://kfserving-examples/models/torchserve/image_classifier/v1 resources: limits: @@ -115,7 +117,7 @@ kubectl apply -f torchserve.yaml nvidia.com/gpu: "1" ``` -=== "New Schema" +=== "Old Schema" ```yaml apiVersion: "serving.kserve.io/v1beta1" @@ -124,15 +126,13 @@ kubectl apply -f torchserve.yaml name: "torchserve" spec: predictor: - model: - modelFormat: - name: pytorch + pytorch: storageUri: gs://kfserving-examples/models/torchserve/image_classifier/v1 resources: limits: memory: 4Gi nvidia.com/gpu: "1" - ``` + ``` For deploying the model on GPU, apply the [gpu.yaml](./gpu.yaml) to create the GPU `InferenceService`. === "kubectl" @@ -202,12 +202,12 @@ curl -v -H "Host: ${SERVICE_HOSTNAME}" http://${INGRESS_HOST}:${INGRESS_PORT}/v1 {"explanations": [[[[0.0005394675730469475, -0.0022280013123036043, -0.003416480100841055, -0.0051329881112415965, -0.009973864160829985, -0.004112560908882716, -0.009223458030656112, -0.0006676354577291628, -0.005249806664413386, -0.0009790519227372953, -0.0026914653993121195, -0.0069470097151383995, -0.00693530415962956, -0.005973878697847718, -0.00425042437288857, 0.0032867281838150977, -0.004297780258633562, -0.005643196661192014, -0.00653025019738562, -0.0047062916121001185, -0.0018656628277792628, -0.0016757477204072532, -0.0010410417081844845, -0.0019093520822156726, -0.004451403461006374, -0.0008552767257773671, -0.0027638888169885267, -0.0], [0.006971297052106784, 0.007316855222185687, 0.012144494329150574, 0.011477799383288441, 0.006846725347670252, 0.01149386176451476, 0.0045351987881190655, 0.007038361889638708, 0.0035855377023272157, 0.003031419502053957, -0.0008611575226775316, -0.0011085224745969223, -0.0050840743637658534, 0.009855491784340777, 0.007220680811043034, 0.011374285598070253, 0.007147725481709019, 0.0037114580912849457, 0.00030763245479291384, 0.0018305492665953394, 0.010106224395114147, 0.012932881164284687, 0.008862892007714321, 0.0070960526615982435, -0.0015931137903787505, 0.0036495747329455906, 0.0002593849391051298, -0.0], [0.006467265785857396, -0.00041793201228071674, 0.004900316089756856, 0.002308395474823997, 0.007859295399592283, 0.003916404948969494, 0.005630750246437249, 0.0043712538044184375, 0.006128530599133763, -0.009446321309831246, -0.014173645867037036, -0.0062988650915794565, -0.011473838941118539, -0.009049151947644047, -0.0007625645864610934, -0.013721416630061238, -0.0005580156670410108, 0.0033404383756480784, -0.006693278798487951, -0.003705084551144756, 0.005100375089529131, 5.5276874714401074e-05, 0.007221745280359063, -0.00573598303916232, -0.006836169033785967, 0.0025401608627538936, 9.303533912921196e-05, -0.0], [0.005914399808621816, 0.00452643561023696, 0.003968242261515448, 0.010422786058967673, 0.007728358107899074, 0.01147115923288383, 0.005683869479056691, 0.011150670502307374, 0.008742555292485278, 0.0032882897575743754, 0.014841138421861584, 0.011741228362482451, 0.0004296862879259221, -0.0035118140680654854, -0.006152254410078331, -0.004925121936901983, -2.3611205202801947e-06, 0.029347073037039074, 0.02901626308947743, 0.023379353021343398, 0.004027157620197582, -0.01677662249919171, -0.013497255736128979, 0.006957482854214602, 0.0018321766800746145, 0.008277034396684563, 0.002733405455464871, -0.0], [0.0049579739156640065, -0.002168016158233997, 0.0020644317321723642, 0.0020912464240293825, 0.004719691119907336, 0.007879231202446626, 0.010594445898145937, 0.006533067778982801, 0.002290214592708113, -0.0036651114968251986, 0.010753227423379443, 0.006402706020466243, -0.047075193909339695, -0.08108259303568185, -0.07646875196692542, -0.1681834845371156, -0.1610307396135756, -0.12010309927453829, -0.016148831320070896, -0.009541525999486027, 0.04575604594761406, 0.031470966329886635, 0.02452149438024385, 0.016594078577569567, 0.012213591301610382, -0.002230875840404426, 0.0036704051254298374, -0.0], [0.006410107592414739, 0.005578283890924384, 0.001977103461731095, 0.008935476507124939, 0.0011305055729953436, 0.0004946313900665659, -0.0040266029554395935, -0.004270765544167256, -0.010832150944943138, -0.01653511868336456, -0.011121302103373972, -0.42038514526905024, -0.22874576003118394, -0.16752936178907055, -0.17021699697722079, -0.09998584936787697, -0.09041117495322142, -0.10230248444795721, -0.15260897522094888, 0.07770835838531896, -0.0813761125123066, 0.027556910053932963, 0.036305965104261866, 0.03407793793894619, 0.01212761779302579, 0.006695133380685627, 0.005331392748588556, -0.0], [0.008342680065996267, -0.00029249776150416367, 0.002782130291086583, 0.0027793744856745373, 0.0020525102690845407, 0.003679269934110004, 0.009373846012918791, -0.0031751745946300403, -0.009042846256743316, 0.0074141593032070775, -0.02796812516561052, -0.593171583786029, -0.4830164472795136, -0.353860128479443, -0.256482708704862, 0.11515586314578445, 0.12700563162828346, 0.0022342450630152204, -0.24673707669992118, -0.012878340813781437, 0.16866821780196756, 0.009739033161051434, -0.000827843726513152, -0.0002137320694585577, -0.004179480126338929, 0.008454049232317358, -0.002767934266266998, -0.0], [0.007070382982749552, 0.005342127805750565, -0.000983984198542354, 0.007910101170274493, 0.001266267696096404, 0.0038575136843053844, 0.006941130321773131, -0.015195182020687892, -0.016954974010578504, -0.031186444096787943, -0.031754626467747966, 0.038918845112017694, 0.06248943950328597, 0.07703301092601872, 0.0438493628024275, -0.0482404449771698, -0.08718650815999045, -0.0014764704694506415, -0.07426336448916614, -0.10378029666564882, 0.008572087846793842, -0.00017173413848283343, 0.010058893270893113, 0.0028410498666004377, 0.002008290211806285, 0.011905375389931099, 0.006071375802943992, -0.0], [0.0076080165949142685, -0.0017127333725310495, 0.00153128150106188, 0.0033391793764531563, 0.005373442509691564, 0.007207746020295443, 0.007422946703693544, -0.00699779191449194, 0.002395328253696969, -0.011682618874195954, -0.012737004464649057, -0.05379966383523857, -0.07174960461749053, -0.03027341304050314, 0.0019411862216381327, -0.0205575129473766, -0.04617091711614171, -0.017655308106959804, -0.009297162816368814, -0.03358572117988279, -0.1626068444778013, -0.015874364762085157, -0.0013736074085577258, -0.014763439328689378, 0.00631805792697278, 0.0021769414283267273, 0.0023061635006792498, -0.0], [0.005569931813561535, 0.004363218328087518, 0.00025609463218383973, 0.009577483244680675, 0.007257755916229399, 0.00976284778532342, -0.006388840235419147, -0.009017880790555707, -0.015308709334434867, -0.016743935775597355, -0.04372596546189275, -0.03523469356755156, -0.017257810114846107, 0.011960489902313411, 0.01529079831828911, -0.020076559119468443, -0.042792547669901516, -0.0029492027218867116, -0.011109560582516062, -0.12985858077848939, -0.2262858575494602, -0.003391725540087574, -0.03063368684328981, -0.01353486587575121, 0.0011140822443932317, 0.006583451102528798, 0.005667533945285076, -0.0], [0.004056272267155598, -0.0006394041203204911, 0.004664893926197093, 0.010593032387298614, 0.014750931538689989, 0.015428721146282149, 0.012167820222401367, 0.017604752451202518, 0.01038886849969188, 0.020544326931163263, -0.0004206566917812794, -0.0037463581359232674, -0.0024656693040735075, 0.0026061897697624353, -0.05186055271869177, -0.09158655048397382, 0.022976389912563913, -0.19851635458461808, -0.11801281807622972, -0.29127727790584423, -0.017138655663803876, -0.04395515676468641, -0.019241432506341576, 0.0011342298743447392, 0.0030625771422964584, -0.0002867924892991192, -0.0017908808807543712, -0.0], [0.0030114260660488892, 0.0020246448273580006, -0.003293361220376816, 0.0036965043883218584, 0.00013185761728146236, -0.004355610866966878, -0.006432601921104354, -0.004148701459814858, 0.005974553907915845, -0.0001399233607281906, 0.010392944122965082, 0.015693249298693028, 0.0459528427528407, -0.013921539948093455, -0.06615556518538708, 0.02921438991320325, -0.16345220625101778, -0.002130491295590408, -0.11449749664916867, -0.030980255589300607, -0.04804122537359171, -0.05144994776295644, 0.005122827412776085, 0.006464862173908011, 0.008624278272940246, 0.0037316228508156427, 0.0036947794337026706, -0.0], [0.0038173843228389405, -0.0017091931226819494, -0.0030871869816778068, 0.002115642501535999, -0.006926441921580917, -0.003023077828426468, -0.014451359520861637, -0.0020793048380231397, -0.010948003939342523, -0.0014460716966395166, -0.01656990336897737, 0.003052317148320358, -0.0026729564809943513, -0.06360067057346147, 0.07780985635080599, -0.1436689936630281, -0.040817177623437874, -0.04373367754296477, -0.18337299150349698, 0.025295182977407064, -0.03874921104331938, -0.002353901742617205, 0.011772560401335033, 0.012480994515707569, 0.006498422579824301, 0.00632320984076023, 0.003407169765754805, -0.0], [0.00944355257990139, 0.009242583578688485, 0.005069860444386138, 0.012666191449103024, 0.00941789912565746, 0.004720427012836104, 0.007597687789204113, 0.008679266528089945, 0.00889322771021875, -0.0008577904940828809, 0.0022973860384607604, 0.025328230809207493, -0.09908781123080951, -0.07836626399832172, -0.1546141264726177, -0.2582207272050766, -0.2297524599578219, -0.29561835103416967, 0.12048787956671528, -0.06279365699861471, -0.03832012404275233, 0.022910264999199934, 0.005803508497672737, -0.003858461926053348, 0.0039451232171312765, 0.003858476747495933, 0.0013034515558609956, -0.0], [0.009725756015628606, -0.0004001101998876524, 0.006490722835571152, 0.00800808023631959, 0.0065880711806331265, -0.0010264326176194034, -0.0018914305972878344, -0.008822522194658438, -0.016650520788128117, -0.03254382594389507, -0.014795713101569494, -0.05826499837818885, -0.05165369567511702, -0.13384277337594377, -0.22572641373340493, -0.21584739544668635, -0.2366836351939208, 0.14937824076489659, -0.08127414932170171, -0.06720440139736879, -0.0038552732903526744, 0.0107597891707803, -5.67453590118174e-05, 0.0020161340511396244, -0.000783322694907436, -0.0006397207517995289, -0.005291639205010064, -0.0], [0.008627543242777584, 0.007700097300051849, 0.0020430960246806138, 0.012949015733198586, 0.008428709579953574, 0.001358177022953576, 0.00421863939925833, 0.002657580000868709, -0.007339431957237175, 0.02008439775442315, -0.0033717631758033114, -0.05176633249899187, -0.013790328758662772, -0.39102366157050594, -0.167341447585844, -0.04813367828213947, 0.1367781582239039, -0.04672809260566293, -0.03237784669978756, 0.03218068777925178, 0.02415063765016493, -0.017849899351200002, -0.002975675228088795, -0.004819438014786686, 0.005106898651831245, 0.0024278620704227456, 6.784303333368138e-05, -0.0], [0.009644258527009343, -0.001331907219439711, -0.0014639718434477777, 0.008481926798958248, 0.010278031715467508, 0.003625808326891529, -0.01121188617599796, -0.0010634587872994379, -0.0002603820881968461, -0.017985648016990465, -0.06446652745470374, 0.07726063173046191, -0.24739929795334742, -0.2701855018480216, -0.08888614776216278, 0.1373325760136816, -0.02316068912438066, -0.042164834956711514, 0.0009266091344106458, 0.03141872420427644, 0.011587728430225652, 0.0004755143243520787, 0.005860642609620605, 0.008979633931394438, 0.005061734169974005, 0.003932710387086098, 0.0015489986106803626, -0.0], [0.010998736164377534, 0.009378969800902604, 0.00030577045264713074, 0.0159329353530375, 0.014849508018911006, -0.0026513365659554225, 0.002923303082126996, 0.01917908707828847, -0.02338288107991566, -0.05706674679291175, 0.009526265752669624, -0.19945255386401284, -0.10725519695909647, -0.3222906835083537, -0.03857038318412844, -0.013279804965996065, -0.046626023244262085, -0.029299060237210447, -0.043269580558906555, -0.03768510002290657, -0.02255977771908117, -0.02632588166863199, -0.014417349488098566, -0.003077271951572957, -0.0004973277708010661, 0.0003475839139671271, -0.0014522783025903258, -0.0], [0.012215315671616316, -0.001693194176229889, 0.011365785434529038, 0.0036964574178487792, -0.010126738168635003, -0.025554378647710443, 0.006538003839811914, -0.03181759044467965, -0.016424751042854728, 0.06177539736110035, -0.43801735323216856, -0.29991040815937386, -0.2516019795363623, 0.037789523540809, -0.010948746374759491, -0.0633901687126727, -0.005976006160777705, 0.006035133605976937, -0.04961632526071937, -0.04142116972831476, -0.07558952727782252, -0.04165176179187153, -0.02021603856619006, -0.0027365663096057032, -0.011145473712733575, 0.0003566937349350848, -0.00546472985268321, -0.0], [0.008009386447317503, 0.006831207743885825, 0.0051306149795546365, 0.016239014770865052, 0.020925441734273218, 0.028344800173195076, -0.004805080609285047, -0.01880521614501033, -0.1272329010865855, -0.39835936819190537, -0.09113694760349819, -0.04061591094832608, -0.12677021961235907, 0.015567707226741051, -0.005615051546243333, -0.06454044862001587, 0.0195457674752272, -0.04219686517155871, -0.08060569979524296, 0.027234494361702787, -0.009152881336047056, -0.030865118003992217, -0.005770311060090559, 0.002905833371986098, 5.606663556872091e-05, 0.003209538083839772, -0.0018588810743365345, -0.0], [0.007587008852984699, -0.0021213639853557625, 0.0007709558092903736, 0.013883256128746423, 0.017328713012428214, 0.03645357525636198, -0.04043993335238427, 0.05730125171252314, -0.2563293727512057, -0.11438826083879326, 0.02662382809034687, 0.03525271352483709, 0.04745678120172762, 0.0336360484090392, -0.002916635707204059, -0.17950855098650784, -0.44161773297052964, -0.4512180227831197, -0.4940283106297913, -0.1970108671285798, 0.04344323143078066, -0.012005120444897523, 0.00987576109166055, -0.0018336757466252476, 0.0004913959502151706, -0.0005409724034216215, -0.005039223900868212, -0.0], [0.00637876531169957, 0.005189469227685454, 0.0007676355246000376, 0.018378100865097655, 0.015739815031394887, -0.035524983116512455, 0.03781006978038308, 0.28859052096740495, 0.0726464110153121, -0.026768468497420147, 0.06278766200288134, 0.17897045813699355, -0.13780371920803108, -0.14176458123649577, -0.1733103177731656, -0.3106508869296763, 0.04788355140275794, 0.04235327890285105, -0.031266625292514394, -0.016263819217960652, -0.031388328800811355, -0.01791363975905968, -0.012025067979443894, 0.008335083985905805, -0.0014386677797296231, 0.0055376544652972854, 0.002241522815466253, -0.0], [0.007455256326741617, -0.0009475207572210404, 0.0020288385162615286, 0.015399640135796092, 0.021133843188103074, -0.019846405097622234, -0.003162485751163173, -0.14199005055318842, -0.044200898667146035, -0.013395459413208084, 0.11019680479230103, -0.014057216041764874, -0.12553853334447865, -0.05992513534766256, 0.06467942189539834, 0.08866056095907732, -0.1451321508061849, -0.07382491447758655, -0.046961739981080476, 0.0008943713493160624, 0.03231044103656507, 0.00036034241706501196, -0.011387669277619417, -0.00014602449257226195, -0.0021863729003374116, 0.0018817840156005856, 0.0037909804578166286, -0.0], [0.006511855618626698, 0.006236866054439829, -0.001440571166157676, 0.012795776609942026, 0.011530545030403624, 0.03495489377257363, 0.04792403136095304, 0.049378583599065225, 0.03296101702085617, -0.0005351385876652296, 0.017744115897640366, 0.0011656622496764954, 0.0232845869823761, -0.0561191397060232, -0.02854070511118366, -0.028614174047247348, -0.007763531086362863, 0.01823079560098924, 0.021961392405283622, -0.009666681805706179, 0.009547046884328725, -0.008729943263791338, 0.006408909680578429, 0.009794327096359952, -0.0025825219195515304, 0.007063559189211571, 0.007867244119267047, -0.0], [0.007936663546039311, -0.00010710180170593153, 0.002716512705673228, 0.0038633557307721487, -0.0014877316616940372, -0.0004788143065635909, 0.012508842248031202, 0.0045381104608414645, -0.010650910516128294, -0.013785341529644855, -0.034287643221318206, -0.022152707546335495, -0.047056481347685974, -0.032166744564720455, -0.021551611335278546, -0.002174962503376043, 0.024344287130424306, 0.015579272560525105, 0.010958169741952194, -0.010607232913436921, -0.005548369726118836, -0.0014630046444242706, 0.013144180105016433, 0.0031349366359021916, 0.0010984887428255974, 0.005426941473328394, 0.006566511860044785, -0.0], [0.0005529184874606495, 0.00026139355020588705, -0.002887623443531047, 0.0013988462990850632, 0.00203365139495493, -0.007276926701775218, -0.004010419939595932, 0.017521952161185662, 0.0006996977433557911, 0.02083134683611201, 0.013690533534289498, -0.005466724359976675, -0.008857712321334327, 0.017408578822635818, 0.0076439343049154425, 0.0017861314923539985, 0.007465865707523924, 0.008034420825988495, 0.003976298558337994, 0.00411970637898539, -0.004572592545819698, 0.0029563907011979935, -0.0006382227820088148, 0.0015153753877889707, -0.0052626601797995595, 0.0025664706985019416, 0.005161751034260073, -0.0], [0.0009424280561998445, -0.0012942360298110595, 0.0011900868416523343, 0.000984424113178899, 0.0020988269382781564, -0.005870080062890889, -0.004950484744457169, 0.003117643454332697, -0.002509563565777083, 0.005831604884101081, 0.009531085216183116, 0.010030206821909806, 0.005858190171099734, 4.9344529936340524e-05, -0.004027895832421331, 0.0025436439920587606, 0.00531153867563076, 0.00495942692369508, 0.009215148318606382, 0.00010011928317543458, 0.0060051362999805355, -0.0008195376963202741, 0.0041728603512658224, -0.0017597169567888774, -0.0010577007775543158, 0.00046033327178068433, -0.0007674196306044449, -0.0], [-0.0, -0.0, 0.0013386963856532302, 0.00035183178922260837, 0.0030610334903526204, 8.951834979315781e-05, 0.0023676793550483524, -0.0002900551076915047, -0.00207019445286608, -7.61697478482574e-05, 0.0012150086715244216, 0.009831239281792168, 0.003479667642621962, 0.0070584324334114525, 0.004161851261339585, 0.0026146296354490665, -9.194746959222099e-05, 0.0013583866966571571, 0.0016821551239318913, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0]]]]} ``` -## Deploy PyTorch Model with V1 GRPC Protocol +## Deploy PyTorch Model with V1 gRPC Protocol _**Note**_: Since kserve has no grpc client methods for v1, we are using torchserve's grpc v1 client ### Create the InferenceService -For deploying the `InferenceService` with GRPC protocol you need to expose the GRPC port on InferenceService. Here **7070** is torchserve GRPC port. +For deploying the `InferenceService` with gRPC protocol you need to expose the gRPC port on InferenceService. Here **7070** is torchserve gRPC port. Apply the following [mnist_grpc.yaml](./mnist.yaml) to create the `InferenceService`. @@ -258,15 +258,15 @@ $inferenceservice.serving.kserve.io/torchserve-grpc created protocol: TCP ``` -### Run Inference with GRPC protocol for torchserve grpc v1 +### Run Inference with TorchServe gRPC protocol -#### Install GRPC python dependencies +#### Install gRPC python dependencies ```bash pip install -U grpcio protobuf grpcio-tools ``` -#### Download torchserve's inference and management proto +#### Download TorchServe's inference and management proto ```bash @@ -279,12 +279,12 @@ curl -s -L ${INFERENCE_PROTO_FILE_PATH} > ./proto/v1/inference.proto curl -s -L ${MANAGEMENT_PROTO_FILE_PATH} > ./proto/v1/management.proto ``` -#### Generate python GRPC client stub using the proto files +#### Generate python gRPC client stub using the proto files ```bash python -m grpc_tools.protoc --proto_path=proto/v1/ --python_out=. --grpc_python_out=. proto/v1/inference.proto proto/v1/management.proto ``` -#### Run GRPC Inference +#### Run gRPC Inference You can use [image converter](https://github.com/kserve/kserve/tree/master/docs/samples/v1beta1/torchserve/v1/imgconv) to convert the images to base64 byte array, for other models please refer to [input request](https://github.com/pytorch/serve/tree/master/kubernetes/kserve/kf_request_json). @@ -310,13 +310,13 @@ python torchserve_grpc_client.py --api_name infer --model $MODEL_NAME --input_pa } ``` -## Deploy PyTorch model with V2 REST Protocol +## Deploy PyTorch model with Open Inference REST Protocol ### Create the InferenceService KServe by default selects the `TorchServe` runtime when you specify the model format `pytorch` on new model spec and enables the KServe v1 inference protocol. -To enable v2 inference protocol, specify the `protocolVersion` field with the value `v2`. +To enable v2 open inference protocol, specify the `protocolVersion` field with the value `v2`. -=== "Old Schema" +=== "New Schema" ```yaml apiVersion: "serving.kserve.io/v1beta1" @@ -325,12 +325,14 @@ To enable v2 inference protocol, specify the `protocolVersion` field with the va name: "torchserve-mnist-v2" spec: predictor: - pytorch: - protocolVersion: v2 + model: + modelFormat: + name: pytorch + protocolVersion: v2 storageUri: gs://kfserving-examples/models/torchserve/image_classifier/v2 ``` - -=== "New Schema" + +=== "Old Schema" ```yaml apiVersion: "serving.kserve.io/v1beta1" @@ -339,10 +341,8 @@ To enable v2 inference protocol, specify the `protocolVersion` field with the va name: "torchserve-mnist-v2" spec: predictor: - model: - modelFormat: - name: pytorch - protocolVersion: v2 + pytorch: + protocolVersion: v2 storageUri: gs://kfserving-examples/models/torchserve/image_classifier/v2 ``` @@ -404,12 +404,12 @@ curl -v -H "Host: ${SERVICE_HOSTNAME}" http://${INGRESS_HOST}:${INGRESS_PORT}/v2 725422148614e-05, 0.0014516114512869852, 0.0002827701966546988, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0, 0.0, 0.0, 0.0, 0.0, -0.0, -0.0014401407633627265, 0.0023812497776698745, 0.002146825301700187, -0.0, -0.0, 0.0, -0.0, 0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, 0.0011500529125940918, 0.0002865015572973405, 0.0029798151042282686, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0017750295500283872, 0.0008339859126060243, -0.00377073933577687, -0.0, -0.0, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0, -0.0, 0.0, 0.0, -0.0006093176894575109, -0.00046905787892409935, 0.0034053218511795034, 0.0, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0, 0.0, -0.0, -0.0, -0.0007450011768391558, 0.001298767372877851, -0.008499247640112315, -6.145166131400234e-05, -0.0, -0.0, -0.0, 0.0, 0.0, -0.0, -0.0, -0.0, 0.0, -0.0, 0.0, 0.0011809726042792137, -0.001838476328106708, 0.00541110661116898, 0.0, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0, -0.0, 0.0, -0.002139234224224006, 0.0003259163407641124, -0.005276118873855287, -0.001950984007438105, -9.545670742026532e-07, 0.0, -0.0, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0, -0.0, 0.0, 0.0, 0.0007772404228681039, -0.0001517956264720738, 0.0064814848131711815, -0.0, 0.0, 0.0, -0.0, -0.0, -0.0, -0.0, -0.0, 8.098064985902114e-05, -0.00249042660692983, -0.0020718619200672302, -5.341117902942147e-05, -0.00045564724429915073, 0.0, -0.0, -0.0, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0, -0.0, 0.0, 0.0, 0.0, 0.0022750983476959733, 0.0017164060958460778, 0.0003221344707738082, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0015560282678744543, 9.107238495871273e-05, 0.0008772841497928399, 0.0006502978626355868, -0.004128780767525651, 0.0006030386900152659, 0.0, -0.0, 0.0, -0.0, -0.0, 0.0, 0.0, -0.0, -0.0, 0.0, -0.0, -0.0, 0.0, 0.0, 0.001395995791096219, 0.0026791526689584344, 0.0023995008266391488, -0.0004496096312746451, 0.003101832450753724, 0.007494536066960778, 0.0028641187148287965, -0.0030525907182629075, 0.003420222396518567, 0.0014924018363498125, -0.0009357388301326025, 0.0007856228933169799, -0.0018433973914981437, 1.6031856831240914e-05, 0.0, 0.0, -0.0, -0.0, 0.0, 0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, 0.0, -0.0006999018502034005, 0.004382250870697946, -0.0035419313267119365, -0.0028896748092595375, -0.00048734542493666705, -0.0060873452419295, 0.000388224990424471, 0.002533641537585585, -0.004352836563597573, -0.0006079418766875505, -0.0038101334053377753, -0.000828441340357984, 0.0, -0.0, 0.0, 0.0, -0.0, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0, 0.0, -0.0, -0.0, -0.0, -0.0, -0.0, 0.0010901530866342661, -0.013135008038845744, 0.0004734518707654666, 0.002050423283568135, -0.006609451922460863, 0.0023647861820124366, 0.0046789204256194, -0.0018122527412311837, 0.002137538353955849, 0.0, -0.0, -0.0, 0.0, 0.0, -0.0, -0.0, -0.0, 0.0, 0.0, 0.0, -0.0, -0.0, 0.0, -0.0, -0.0, -0.0, -0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0, 0.0, -0.0, -0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0, 0.0, 0.0, -0.0, -0.0, 0.0, -0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.0, -0.0, 0.0, -0.0, -0.0, -0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0, 0.0, 0.0, -0.0, -0.0, -0.0, 0.0, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, 0.0, 0.0, -0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.0, -0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.0, 0.0, -0.0, -0.0, -0.0, -0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}]} ``` -## Deploy PyTorch Model with V2 GRPC Protocol +## Deploy PyTorch Model with Open Inference gRPC Protocol ### Create the InferenceService -For deploying the `InferenceService` with GRPC protocol you need to expose the GRPC port on InferenceService. Here **8081** is kserve GRPC port. +For deploying the `InferenceService` with `Open Inference gRPC Protocol` you need to expose the gRPC port on InferenceService. Here **8081** is kserve gRPC port. Apply the following [mnist_grpc_v2.yaml](./mnist_grpc_v2.yaml) to create the `InferenceService`. @@ -463,7 +463,7 @@ $inferenceservice.serving.kserve.io/torchserve-grpc-v2 created ``` -### Download kserve's grpc inference proto +### Download Open Inference gGRPC proto file ```bash mkdir -p proto/v2 From 6424a7710e8a54a187d5f45bda851e4fcc721e15 Mon Sep 17 00:00:00 2001 From: Dan Sun Date: Mon, 18 Dec 2023 00:05:14 -0500 Subject: [PATCH 6/7] Update README.md --- docs/modelserving/v1beta1/torchserve/README.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/modelserving/v1beta1/torchserve/README.md b/docs/modelserving/v1beta1/torchserve/README.md index 385cf80f7..d0de0dfda 100644 --- a/docs/modelserving/v1beta1/torchserve/README.md +++ b/docs/modelserving/v1beta1/torchserve/README.md @@ -260,14 +260,13 @@ $inferenceservice.serving.kserve.io/torchserve-grpc created ### Run Inference with TorchServe gRPC protocol -#### Install gRPC python dependencies +Install gRPC python dependencies ```bash pip install -U grpcio protobuf grpcio-tools ``` -#### Download TorchServe's inference and management proto - +Download TorchServe's inference and management proto ```bash mkdir -p proto/v1 @@ -279,11 +278,12 @@ curl -s -L ${INFERENCE_PROTO_FILE_PATH} > ./proto/v1/inference.proto curl -s -L ${MANAGEMENT_PROTO_FILE_PATH} > ./proto/v1/management.proto ``` -#### Generate python gRPC client stub using the proto files +Generate python gRPC client stub using the proto files ```bash python -m grpc_tools.protoc --proto_path=proto/v1/ --python_out=. --grpc_python_out=. proto/v1/inference.proto proto/v1/management.proto ``` + #### Run gRPC Inference You can use [image converter](https://github.com/kserve/kserve/tree/master/docs/samples/v1beta1/torchserve/v1/imgconv) to convert the images to base64 byte array, for other models please refer to [input request](https://github.com/pytorch/serve/tree/master/kubernetes/kserve/kf_request_json). @@ -463,8 +463,11 @@ $inferenceservice.serving.kserve.io/torchserve-grpc-v2 created ``` -### Download Open Inference gGRPC proto file +### Run gRPC Inference + +The first step is to [determine the ingress IP and ports](../../../get_started/first_isvc.md#4-determine-the-ingress-ip-and-ports) and set `INGRESS_HOST` and `INGRESS_PORT`. +Then download Open Inference gRPC proto file: ```bash mkdir -p proto/v2 @@ -473,10 +476,7 @@ PROTO_FILE_PATH=https://raw.githubusercontent.com/kserve/kserve/master/python/ks curl -s -L ${PROTO_FILE_PATH} > ./proto/v2/grpc_predict_v2.proto ``` -### Run Model Inference - -The first step is to [determine the ingress IP and ports](../../../get_started/first_isvc.md#4-determine-the-ingress-ip-and-ports) and set `INGRESS_HOST` and `INGRESS_PORT`. - +Run the inference test with grpcurl: ```bash INPUT_PATH=./mnist_v2_grpc_tensor.json PROTO_FILE=proto/v2/grpc_predict_v2.proto From 7c235e20e48821cce3108441ffebd3344232fd9f Mon Sep 17 00:00:00 2001 From: Dan Sun Date: Mon, 18 Dec 2023 00:15:44 -0500 Subject: [PATCH 7/7] Update README.md --- docs/modelserving/v1beta1/torchserve/README.md | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/docs/modelserving/v1beta1/torchserve/README.md b/docs/modelserving/v1beta1/torchserve/README.md index d0de0dfda..3989c4011 100644 --- a/docs/modelserving/v1beta1/torchserve/README.md +++ b/docs/modelserving/v1beta1/torchserve/README.md @@ -260,13 +260,13 @@ $inferenceservice.serving.kserve.io/torchserve-grpc created ### Run Inference with TorchServe gRPC protocol -Install gRPC python dependencies +Install gRPC python dependencies: ```bash pip install -U grpcio protobuf grpcio-tools ``` -Download TorchServe's inference and management proto +Download TorchServe's inference and management proto: ```bash mkdir -p proto/v1 @@ -278,19 +278,16 @@ curl -s -L ${INFERENCE_PROTO_FILE_PATH} > ./proto/v1/inference.proto curl -s -L ${MANAGEMENT_PROTO_FILE_PATH} > ./proto/v1/management.proto ``` -Generate python gRPC client stub using the proto files +Generate python gRPC client stub using the proto files: ```bash python -m grpc_tools.protoc --proto_path=proto/v1/ --python_out=. --grpc_python_out=. proto/v1/inference.proto proto/v1/management.proto ``` -#### Run gRPC Inference You can use [image converter](https://github.com/kserve/kserve/tree/master/docs/samples/v1beta1/torchserve/v1/imgconv) to convert the images to base64 byte array, for other models please refer to [input request](https://github.com/pytorch/serve/tree/master/kubernetes/kserve/kf_request_json). -Use this [`mnist.json`](./mnist.json) for sample prediction input. - -Refer the following [`torchserve_grpc_client.py`](./torchserve_grpc_client.py) python script to make torchserve v1 GRPC call. +Run gRPC Inference using [`torchserve_grpc_client.py`](./torchserve_grpc_client.py) with [`mnist.json`](./mnist.json) as an example prediction input. ```bash