From 58e8db3087a5851816a5ed72d5b6a5d80fc5721e Mon Sep 17 00:00:00 2001
From: Naushir Patuck <naush@raspberrypi.com>
Date: Fri, 27 Sep 2024 13:35:52 +0100
Subject: [PATCH] Add support for the IMX500

Add demo scripts for neural network inferencing on the IMX500 device
Add an IMX500 devices helper library including sw postprocessing routines

Signed-off-by: Naushir Patuck <naush@raspberrypi.com>
---
 examples/imx500/assets/coco_labels.txt        |   90 ++
 examples/imx500/assets/colours.txt            |   21 +
 examples/imx500/assets/imagenet_labels.txt    | 1001 +++++++++++++++++
 examples/imx500/imx500_classification_demo.py |  156 +++
 .../imx500/imx500_object_detection_demo.py    |  174 +++
 .../imx500/imx500_object_detection_demo_mp.py |  194 ++++
 ...imx500_pose_estimation_higherhrnet_demo.py |  117 ++
 examples/imx500/imx500_segmentation_demo.py   |  101 ++
 picamera2/devices/__init__.py                 |    1 +
 picamera2/devices/imx500/__init__.py          |    6 +
 picamera2/devices/imx500/imx500.py            |  709 ++++++++++++
 picamera2/devices/imx500/postprocess.py       |  414 +++++++
 .../imx500/postprocess_efficientdet_lite0.py  |  213 ++++
 .../devices/imx500/postprocess_highernet.py   |  562 +++++++++
 .../devices/imx500/postprocess_nanodet.py     |   63 ++
 .../devices/imx500/postprocess_yolov5.py      |  244 ++++
 .../devices/imx500/postprocess_yolov8.py      |  176 +++
 setup.py                                      |    9 +-
 18 files changed, 4248 insertions(+), 3 deletions(-)
 create mode 100644 examples/imx500/assets/coco_labels.txt
 create mode 100644 examples/imx500/assets/colours.txt
 create mode 100644 examples/imx500/assets/imagenet_labels.txt
 create mode 100755 examples/imx500/imx500_classification_demo.py
 create mode 100755 examples/imx500/imx500_object_detection_demo.py
 create mode 100755 examples/imx500/imx500_object_detection_demo_mp.py
 create mode 100755 examples/imx500/imx500_pose_estimation_higherhrnet_demo.py
 create mode 100755 examples/imx500/imx500_segmentation_demo.py
 create mode 100644 picamera2/devices/imx500/__init__.py
 create mode 100644 picamera2/devices/imx500/imx500.py
 create mode 100644 picamera2/devices/imx500/postprocess.py
 create mode 100644 picamera2/devices/imx500/postprocess_efficientdet_lite0.py
 create mode 100644 picamera2/devices/imx500/postprocess_highernet.py
 create mode 100644 picamera2/devices/imx500/postprocess_nanodet.py
 create mode 100644 picamera2/devices/imx500/postprocess_yolov5.py
 create mode 100644 picamera2/devices/imx500/postprocess_yolov8.py

diff --git a/examples/imx500/assets/coco_labels.txt b/examples/imx500/assets/coco_labels.txt
new file mode 100644
index 00000000..a76dd2a2
--- /dev/null
+++ b/examples/imx500/assets/coco_labels.txt
@@ -0,0 +1,90 @@
+person
+bicycle
+car
+motorcycle
+airplane
+bus
+train
+truck
+boat
+traffic light
+fire hydrant
+-
+stop sign
+parking meter
+bench
+bird
+cat
+dog
+horse
+sheep
+cow
+elephant
+bear
+zebra
+giraffe
+-
+backpack
+umbrella
+-
+-
+handbag
+tie
+suitcase
+frisbee
+skis
+snowboard
+sports ball
+kite
+baseball bat
+baseball glove
+skateboard
+surfboard
+tennis racket
+bottle
+-
+wine glass
+cup
+fork
+knife
+spoon
+bowl
+banana
+apple
+sandwich
+orange
+broccoli
+carrot
+hot dog
+pizza
+donut
+cake
+chair
+couch
+potted plant
+bed
+-
+dining table
+-
+-
+toilet
+-
+tv
+laptop
+mouse
+remote
+keyboard
+cell phone
+microwave
+oven
+toaster
+sink
+refrigerator
+-
+book
+clock
+vase
+scissors
+teddy bear
+hair drier
+toothbrush
diff --git a/examples/imx500/assets/colours.txt b/examples/imx500/assets/colours.txt
new file mode 100644
index 00000000..02141d11
--- /dev/null
+++ b/examples/imx500/assets/colours.txt
@@ -0,0 +1,21 @@
+128 0 0 255
+0 128 0 255
+128 128 0 255
+0 0 128 255
+128 0 128 255
+0 128 128 255
+128 128 128 255
+64 0 0 255
+192 0 0 255
+64 128 0 255
+192 128 0 255
+64 0 128 255
+192 0 128 255
+64 128 128 255
+192 128 128 255
+0 64 0 255
+128 64 0 255
+0 192 0 255
+128 192 0 255
+0 64 128 255
+0 0 0 255
\ No newline at end of file
diff --git a/examples/imx500/assets/imagenet_labels.txt b/examples/imx500/assets/imagenet_labels.txt
new file mode 100644
index 00000000..17459f63
--- /dev/null
+++ b/examples/imx500/assets/imagenet_labels.txt
@@ -0,0 +1,1001 @@
+0:background
+1:tench, Tinca tinca
+2:goldfish, Carassius auratus
+3:great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias
+4:tiger shark, Galeocerdo cuvieri
+5:hammerhead, hammerhead shark
+6:electric ray, crampfish, numbfish, torpedo
+7:stingray
+8:cock
+9:hen
+10:ostrich, Struthio camelus
+11:brambling, Fringilla montifringilla
+12:goldfinch, Carduelis carduelis
+13:house finch, linnet, Carpodacus mexicanus
+14:junco, snowbird
+15:indigo bunting, indigo finch, indigo bird, Passerina cyanea
+16:robin, American robin, Turdus migratorius
+17:bulbul
+18:jay
+19:magpie
+20:chickadee
+21:water ouzel, dipper
+22:kite
+23:bald eagle, American eagle, Haliaeetus leucocephalus
+24:vulture
+25:great grey owl, great gray owl, Strix nebulosa
+26:European fire salamander, Salamandra salamandra
+27:common newt, Triturus vulgaris
+28:eft
+29:spotted salamander, Ambystoma maculatum
+30:axolotl, mud puppy, Ambystoma mexicanum
+31:bullfrog, Rana catesbeiana
+32:tree frog, tree-frog
+33:tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui
+34:loggerhead, loggerhead turtle, Caretta caretta
+35:leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea
+36:mud turtle
+37:terrapin
+38:box turtle, box tortoise
+39:banded gecko
+40:common iguana, iguana, Iguana iguana
+41:American chameleon, anole, Anolis carolinensis
+42:whiptail, whiptail lizard
+43:agama
+44:frilled lizard, Chlamydosaurus kingi
+45:alligator lizard
+46:Gila monster, Heloderma suspectum
+47:green lizard, Lacerta viridis
+48:African chameleon, Chamaeleo chamaeleon
+49:Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis
+50:African crocodile, Nile crocodile, Crocodylus niloticus
+51:American alligator, Alligator mississipiensis
+52:triceratops
+53:thunder snake, worm snake, Carphophis amoenus
+54:ringneck snake, ring-necked snake, ring snake
+55:hognose snake, puff adder, sand viper
+56:green snake, grass snake
+57:king snake, kingsnake
+58:garter snake, grass snake
+59:water snake
+60:vine snake
+61:night snake, Hypsiglena torquata
+62:boa constrictor, Constrictor constrictor
+63:rock python, rock snake, Python sebae
+64:Indian cobra, Naja naja
+65:green mamba
+66:sea snake
+67:horned viper, cerastes, sand viper, horned asp, Cerastes cornutus
+68:diamondback, diamondback rattlesnake, Crotalus adamanteus
+69:sidewinder, horned rattlesnake, Crotalus cerastes
+70:trilobite
+71:harvestman, daddy longlegs, Phalangium opilio
+72:scorpion
+73:black and gold garden spider, Argiope aurantia
+74:barn spider, Araneus cavaticus
+75:garden spider, Aranea diademata
+76:black widow, Latrodectus mactans
+77:tarantula
+78:wolf spider, hunting spider
+79:tick
+80:centipede
+81:black grouse
+82:ptarmigan
+83:ruffed grouse, partridge, Bonasa umbellus
+84:prairie chicken, prairie grouse, prairie fowl
+85:peacock
+86:quail
+87:partridge
+88:African grey, African gray, Psittacus erithacus
+89:macaw
+90:sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita
+91:lorikeet
+92:coucal
+93:bee eater
+94:hornbill
+95:hummingbird
+96:jacamar
+97:toucan
+98:drake
+99:red-breasted merganser, Mergus serrator
+100:goose
+101:black swan, Cygnus atratus
+102:tusker
+103:echidna, spiny anteater, anteater
+104:platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus
+105:wallaby, brush kangaroo
+106:koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus
+107:wombat
+108:jellyfish
+109:sea anemone, anemone
+110:brain coral
+111:flatworm, platyhelminth
+112:nematode, nematode worm, roundworm
+113:conch
+114:snail
+115:slug
+116:sea slug, nudibranch
+117:chiton, coat-of-mail shell, sea cradle, polyplacophore
+118:chambered nautilus, pearly nautilus, nautilus
+119:Dungeness crab, Cancer magister
+120:rock crab, Cancer irroratus
+121:fiddler crab
+122:king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica
+123:American lobster, Northern lobster, Maine lobster, Homarus americanus
+124:spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish
+125:crayfish, crawfish, crawdad, crawdaddy
+126:hermit crab
+127:isopod
+128:white stork, Ciconia ciconia
+129:black stork, Ciconia nigra
+130:spoonbill
+131:flamingo
+132:little blue heron, Egretta caerulea
+133:American egret, great white heron, Egretta albus
+134:bittern
+135:crane
+136:limpkin, Aramus pictus
+137:European gallinule, Porphyrio porphyrio
+138:American coot, marsh hen, mud hen, water hen, Fulica americana
+139:bustard
+140:ruddy turnstone, Arenaria interpres
+141:red-backed sandpiper, dunlin, Erolia alpina
+142:redshank, Tringa totanus
+143:dowitcher
+144:oystercatcher, oyster catcher
+145:pelican
+146:king penguin, Aptenodytes patagonica
+147:albatross, mollymawk
+148:grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus
+149:killer whale, killer, orca, grampus, sea wolf, Orcinus orca
+150:dugong, Dugong dugon
+151:sea lion
+152:Chihuahua
+153:Japanese spaniel
+154:Maltese dog, Maltese terrier, Maltese
+155:Pekinese, Pekingese, Peke
+156:Shih-Tzu
+157:Blenheim spaniel
+158:papillon
+159:toy terrier
+160:Rhodesian ridgeback
+161:Afghan hound, Afghan
+162:basset, basset hound
+163:beagle
+164:bloodhound, sleuthhound
+165:bluetick
+166:black-and-tan coonhound
+167:Walker hound, Walker foxhound
+168:English foxhound
+169:redbone
+170:borzoi, Russian wolfhound
+171:Irish wolfhound
+172:Italian greyhound
+173:whippet
+174:Ibizan hound, Ibizan Podenco
+175:Norwegian elkhound, elkhound
+176:otterhound, otter hound
+177:Saluki, gazelle hound
+178:Scottish deerhound, deerhound
+179:Weimaraner
+180:Staffordshire bullterrier, Staffordshire bull terrier
+181:American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier
+182:Bedlington terrier
+183:Border terrier
+184:Kerry blue terrier
+185:Irish terrier
+186:Norfolk terrier
+187:Norwich terrier
+188:Yorkshire terrier
+189:wire-haired fox terrier
+190:Lakeland terrier
+191:Sealyham terrier, Sealyham
+192:Airedale, Airedale terrier
+193:cairn, cairn terrier
+194:Australian terrier
+195:Dandie Dinmont, Dandie Dinmont terrier
+196:Boston bull, Boston terrier
+197:miniature schnauzer
+198:giant schnauzer
+199:standard schnauzer
+200:Scotch terrier, Scottish terrier, Scottie
+201:Tibetan terrier, chrysanthemum dog
+202:silky terrier, Sydney silky
+203:soft-coated wheaten terrier
+204:West Highland white terrier
+205:Lhasa, Lhasa apso
+206:flat-coated retriever
+207:curly-coated retriever
+208:golden retriever
+209:Labrador retriever
+210:Chesapeake Bay retriever
+211:German short-haired pointer
+212:vizsla, Hungarian pointer
+213:English setter
+214:Irish setter, red setter
+215:Gordon setter
+216:Brittany spaniel
+217:clumber, clumber spaniel
+218:English springer, English springer spaniel
+219:Welsh springer spaniel
+220:cocker spaniel, English cocker spaniel, cocker
+221:Sussex spaniel
+222:Irish water spaniel
+223:kuvasz
+224:schipperke
+225:groenendael
+226:malinois
+227:briard
+228:kelpie
+229:komondor
+230:Old English sheepdog, bobtail
+231:Shetland sheepdog, Shetland sheep dog, Shetland
+232:collie
+233:Border collie
+234:Bouvier des Flandres, Bouviers des Flandres
+235:Rottweiler
+236:German shepherd, German shepherd dog, German police dog, alsatian
+237:Doberman, Doberman pinscher
+238:miniature pinscher
+239:Greater Swiss Mountain dog
+240:Bernese mountain dog
+241:Appenzeller
+242:EntleBucher
+243:boxer
+244:bull mastiff
+245:Tibetan mastiff
+246:French bulldog
+247:Great Dane
+248:Saint Bernard, St Bernard
+249:Eskimo dog, husky
+250:malamute, malemute, Alaskan malamute
+251:Siberian husky
+252:dalmatian, coach dog, carriage dog
+253:affenpinscher, monkey pinscher, monkey dog
+254:basenji
+255:pug, pug-dog
+256:Leonberg
+257:Newfoundland, Newfoundland dog
+258:Great Pyrenees
+259:Samoyed, Samoyede
+260:Pomeranian
+261:chow, chow chow
+262:keeshond
+263:Brabancon griffon
+264:Pembroke, Pembroke Welsh corgi
+265:Cardigan, Cardigan Welsh corgi
+266:toy poodle
+267:miniature poodle
+268:standard poodle
+269:Mexican hairless
+270:timber wolf, grey wolf, gray wolf, Canis lupus
+271:white wolf, Arctic wolf, Canis lupus tundrarum
+272:red wolf, maned wolf, Canis rufus, Canis niger
+273:coyote, prairie wolf, brush wolf, Canis latrans
+274:dingo, warrigal, warragal, Canis dingo
+275:dhole, Cuon alpinus
+276:African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus
+277:hyena, hyaena
+278:red fox, Vulpes vulpes
+279:kit fox, Vulpes macrotis
+280:Arctic fox, white fox, Alopex lagopus
+281:grey fox, gray fox, Urocyon cinereoargenteus
+282:tabby, tabby cat
+283:tiger cat
+284:Persian cat
+285:Siamese cat, Siamese
+286:Egyptian cat
+287:cougar, puma, catamount, mountain lion, painter, panther, Felis concolor
+288:lynx, catamount
+289:leopard, Panthera pardus
+290:snow leopard, ounce, Panthera uncia
+291:jaguar, panther, Panthera onca, Felis onca
+292:lion, king of beasts, Panthera leo
+293:tiger, Panthera tigris
+294:cheetah, chetah, Acinonyx jubatus
+295:brown bear, bruin, Ursus arctos
+296:American black bear, black bear, Ursus americanus, Euarctos americanus
+297:ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus
+298:sloth bear, Melursus ursinus, Ursus ursinus
+299:mongoose
+300:meerkat, mierkat
+301:tiger beetle
+302:ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle
+303:ground beetle, carabid beetle
+304:long-horned beetle, longicorn, longicorn beetle
+305:leaf beetle, chrysomelid
+306:dung beetle
+307:rhinoceros beetle
+308:weevil
+309:fly
+310:bee
+311:ant, emmet, pismire
+312:grasshopper, hopper
+313:cricket
+314:walking stick, walkingstick, stick insect
+315:cockroach, roach
+316:mantis, mantid
+317:cicada, cicala
+318:leafhopper
+319:lacewing, lacewing fly
+320:dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk
+321:damselfly
+322:admiral
+323:ringlet, ringlet butterfly
+324:monarch, monarch butterfly, milkweed butterfly, Danaus plexippus
+325:cabbage butterfly
+326:sulphur butterfly, sulfur butterfly
+327:lycaenid, lycaenid butterfly
+328:starfish, sea star
+329:sea urchin
+330:sea cucumber, holothurian
+331:wood rabbit, cottontail, cottontail rabbit
+332:hare
+333:Angora, Angora rabbit
+334:hamster
+335:porcupine, hedgehog
+336:fox squirrel, eastern fox squirrel, Sciurus niger
+337:marmot
+338:beaver
+339:guinea pig, Cavia cobaya
+340:sorrel
+341:zebra
+342:hog, pig, grunter, squealer, Sus scrofa
+343:wild boar, boar, Sus scrofa
+344:warthog
+345:hippopotamus, hippo, river horse, Hippopotamus amphibius
+346:ox
+347:water buffalo, water ox, Asiatic buffalo, Bubalus bubalis
+348:bison
+349:ram, tup
+350:bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis
+351:ibex, Capra ibex
+352:hartebeest
+353:impala, Aepyceros melampus
+354:gazelle
+355:Arabian camel, dromedary, Camelus dromedarius
+356:llama
+357:weasel
+358:mink
+359:polecat, fitch, foulmart, foumart, Mustela putorius
+360:black-footed ferret, ferret, Mustela nigripes
+361:otter
+362:skunk, polecat, wood pussy
+363:badger
+364:armadillo
+365:three-toed sloth, ai, Bradypus tridactylus
+366:orangutan, orang, orangutang, Pongo pygmaeus
+367:gorilla, Gorilla gorilla
+368:chimpanzee, chimp, Pan troglodytes
+369:gibbon, Hylobates lar
+370:siamang, Hylobates syndactylus, Symphalangus syndactylus
+371:guenon, guenon monkey
+372:patas, hussar monkey, Erythrocebus patas
+373:baboon
+374:macaque
+375:langur
+376:colobus, colobus monkey
+377:proboscis monkey, Nasalis larvatus
+378:marmoset
+379:capuchin, ringtail, Cebus capucinus
+380:howler monkey, howler
+381:titi, titi monkey
+382:spider monkey, Ateles geoffroyi
+383:squirrel monkey, Saimiri sciureus
+384:Madagascar cat, ring-tailed lemur, Lemur catta
+385:indri, indris, Indri indri, Indri brevicaudatus
+386:Indian elephant, Elephas maximus
+387:African elephant, Loxodonta africana
+388:lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens
+389:giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca
+390:barracouta, snoek
+391:eel
+392:coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch
+393:rock beauty, Holocanthus tricolor
+394:anemone fish
+395:sturgeon
+396:gar, garfish, garpike, billfish, Lepisosteus osseus
+397:lionfish
+398:puffer, pufferfish, blowfish, globefish
+399:abacus
+400:abaya
+401:academic gown, academic robe, judge's robe
+402:accordion, piano accordion, squeeze box
+403:acoustic guitar
+404:aircraft carrier, carrier, flattop, attack aircraft carrier
+405:airliner
+406:airship, dirigible
+407:altar
+408:ambulance
+409:amphibian, amphibious vehicle
+410:analog clock
+411:apiary, bee house
+412:apron
+413:ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin
+414:assault rifle, assault gun
+415:backpack, back pack, knapsack, packsack, rucksack, haversack
+416:bakery, bakeshop, bakehouse
+417:balance beam, beam
+418:balloon
+419:ballpoint, ballpoint pen, ballpen, Biro
+420:Band Aid
+421:banjo
+422:bannister, banister, balustrade, balusters, handrail
+423:barbell
+424:barber chair
+425:barbershop
+426:barn
+427:barometer
+428:barrel, cask
+429:barrow, garden cart, lawn cart, wheelbarrow
+430:baseball
+431:basketball
+432:bassinet
+433:bassoon
+434:bathing cap, swimming cap
+435:bath towel
+436:bathtub, bathing tub, bath, tub
+437:beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon
+438:beacon, lighthouse, beacon light, pharos
+439:beaker
+440:bearskin, busby, shako
+441:beer bottle
+442:beer glass
+443:bell cote, bell cot
+444:bib
+445:bicycle-built-for-two, tandem bicycle, tandem
+446:bikini, two-piece
+447:binder, ring-binder
+448:binoculars, field glasses, opera glasses
+449:birdhouse
+450:boathouse
+451:bobsled, bobsleigh, bob
+452:bolo tie, bolo, bola tie, bola
+453:bonnet, poke bonnet
+454:bookcase
+455:bookshop, bookstore, bookstall
+456:bottlecap
+457:bow
+458:bow tie, bow-tie, bowtie
+459:brass, memorial tablet, plaque
+460:brassiere, bra, bandeau
+461:breakwater, groin, groyne, mole, bulwark, seawall, jetty
+462:breastplate, aegis, egis
+463:broom
+464:bucket, pail
+465:buckle
+466:bulletproof vest
+467:bullet train, bullet
+468:butcher shop, meat market
+469:cab, hack, taxi, taxicab
+470:caldron, cauldron
+471:candle, taper, wax light
+472:cannon
+473:canoe
+474:can opener, tin opener
+475:cardigan
+476:car mirror
+477:carousel, carrousel, merry-go-round, roundabout, whirligig
+478:carpenter's kit, tool kit
+479:carton
+480:car wheel
+481:cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM
+482:cassette
+483:cassette player
+484:castle
+485:catamaran
+486:CD player
+487:cello, violoncello
+488:cellular telephone, cellular phone, cellphone, cell, mobile phone
+489:chain
+490:chainlink fence
+491:chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour
+492:chain saw, chainsaw
+493:chest
+494:chiffonier, commode
+495:chime, bell, gong
+496:china cabinet, china closet
+497:Christmas stocking
+498:church, church building
+499:cinema, movie theater, movie theatre, movie house, picture palace
+500:cleaver, meat cleaver, chopper
+501:cliff dwelling
+502:cloak
+503:clog, geta, patten, sabot
+504:cocktail shaker
+505:coffee mug
+506:coffeepot
+507:coil, spiral, volute, whorl, helix
+508:combination lock
+509:computer keyboard, keypad
+510:confectionery, confectionary, candy store
+511:container ship, containership, container vessel
+512:convertible
+513:corkscrew, bottle screw
+514:cornet, horn, trumpet, trump
+515:cowboy boot
+516:cowboy hat, ten-gallon hat
+517:cradle
+518:crane
+519:crash helmet
+520:crate
+521:crib, cot
+522:Crock Pot
+523:croquet ball
+524:crutch
+525:cuirass
+526:dam, dike, dyke
+527:desk
+528:desktop computer
+529:dial telephone, dial phone
+530:diaper, nappy, napkin
+531:digital clock
+532:digital watch
+533:dining table, board
+534:dishrag, dishcloth
+535:dishwasher, dish washer, dishwashing machine
+536:disk brake, disc brake
+537:dock, dockage, docking facility
+538:dogsled, dog sled, dog sleigh
+539:dome
+540:doormat, welcome mat
+541:drilling platform, offshore rig
+542:drum, membranophone, tympan
+543:drumstick
+544:dumbbell
+545:Dutch oven
+546:electric fan, blower
+547:electric guitar
+548:electric locomotive
+549:entertainment center
+550:envelope
+551:espresso maker
+552:face powder
+553:feather boa, boa
+554:file, file cabinet, filing cabinet
+555:fireboat
+556:fire engine, fire truck
+557:fire screen, fireguard
+558:flagpole, flagstaff
+559:flute, transverse flute
+560:folding chair
+561:football helmet
+562:forklift
+563:fountain
+564:fountain pen
+565:four-poster
+566:freight car
+567:French horn, horn
+568:frying pan, frypan, skillet
+569:fur coat
+570:garbage truck, dustcart
+571:gasmask, respirator, gas helmet
+572:gas pump, gasoline pump, petrol pump, island dispenser
+573:goblet
+574:go-kart
+575:golf ball
+576:golfcart, golf cart
+577:gondola
+578:gong, tam-tam
+579:gown
+580:grand piano, grand
+581:greenhouse, nursery, glasshouse
+582:grille, radiator grille
+583:grocery store, grocery, food market, market
+584:guillotine
+585:hair slide
+586:hair spray
+587:half track
+588:hammer
+589:hamper
+590:hand blower, blow dryer, blow drier, hair dryer, hair drier
+591:hand-held computer, hand-held microcomputer
+592:handkerchief, hankie, hanky, hankey
+593:hard disc, hard disk, fixed disk
+594:harmonica, mouth organ, harp, mouth harp
+595:harp
+596:harvester, reaper
+597:hatchet
+598:holster
+599:home theater, home theatre
+600:honeycomb
+601:hook, claw
+602:hoopskirt, crinoline
+603:horizontal bar, high bar
+604:horse cart, horse-cart
+605:hourglass
+606:iPod
+607:iron, smoothing iron
+608:jack-o'-lantern
+609:jean, blue jean, denim
+610:jeep, landrover
+611:jersey, T-shirt, tee shirt
+612:jigsaw puzzle
+613:jinrikisha, ricksha, rickshaw
+614:joystick
+615:kimono
+616:knee pad
+617:knot
+618:lab coat, laboratory coat
+619:ladle
+620:lampshade, lamp shade
+621:laptop, laptop computer
+622:lawn mower, mower
+623:lens cap, lens cover
+624:letter opener, paper knife, paperknife
+625:library
+626:lifeboat
+627:lighter, light, igniter, ignitor
+628:limousine, limo
+629:liner, ocean liner
+630:lipstick, lip rouge
+631:Loafer
+632:lotion
+633:loudspeaker, speaker, speaker unit, loudspeaker system, speaker system
+634:loupe, jeweler's loupe
+635:lumbermill, sawmill
+636:magnetic compass
+637:mailbag, postbag
+638:mailbox, letter box
+639:maillot
+640:maillot, tank suit
+641:manhole cover
+642:maraca
+643:marimba, xylophone
+644:mask
+645:matchstick
+646:maypole
+647:maze, labyrinth
+648:measuring cup
+649:medicine chest, medicine cabinet
+650:megalith, megalithic structure
+651:microphone, mike
+652:microwave, microwave oven
+653:military uniform
+654:milk can
+655:minibus
+656:miniskirt, mini
+657:minivan
+658:missile
+659:mitten
+660:mixing bowl
+661:mobile home, manufactured home
+662:Model T
+663:modem
+664:monastery
+665:monitor
+666:moped
+667:mortar
+668:mortarboard
+669:mosque
+670:mosquito net
+671:motor scooter, scooter
+672:mountain bike, all-terrain bike, off-roader
+673:mountain tent
+674:mouse, computer mouse
+675:mousetrap
+676:moving van
+677:muzzle
+678:nail
+679:neck brace
+680:necklace
+681:nipple
+682:notebook, notebook computer
+683:obelisk
+684:oboe, hautboy, hautbois
+685:ocarina, sweet potato
+686:odometer, hodometer, mileometer, milometer
+687:oil filter
+688:organ, pipe organ
+689:oscilloscope, scope, cathode-ray oscilloscope, CRO
+690:overskirt
+691:oxcart
+692:oxygen mask
+693:packet
+694:paddle, boat paddle
+695:paddlewheel, paddle wheel
+696:padlock
+697:paintbrush
+698:pajama, pyjama, pj's, jammies
+699:palace
+700:panpipe, pandean pipe, syrinx
+701:paper towel
+702:parachute, chute
+703:parallel bars, bars
+704:park bench
+705:parking meter
+706:passenger car, coach, carriage
+707:patio, terrace
+708:pay-phone, pay-station
+709:pedestal, plinth, footstall
+710:pencil box, pencil case
+711:pencil sharpener
+712:perfume, essence
+713:Petri dish
+714:photocopier
+715:pick, plectrum, plectron
+716:pickelhaube
+717:picket fence, paling
+718:pickup, pickup truck
+719:pier
+720:piggy bank, penny bank
+721:pill bottle
+722:pillow
+723:ping-pong ball
+724:pinwheel
+725:pirate, pirate ship
+726:pitcher, ewer
+727:plane, carpenter's plane, woodworking plane
+728:planetarium
+729:plastic bag
+730:plate rack
+731:plow, plough
+732:plunger, plumber's helper
+733:Polaroid camera, Polaroid Land camera
+734:pole
+735:police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria
+736:poncho
+737:pool table, billiard table, snooker table
+738:pop bottle, soda bottle
+739:pot, flowerpot
+740:potter's wheel
+741:power drill
+742:prayer rug, prayer mat
+743:printer
+744:prison, prison house
+745:projectile, missile
+746:projector
+747:puck, hockey puck
+748:punching bag, punch bag, punching ball, punchball
+749:purse
+750:quill, quill pen
+751:quilt, comforter, comfort, puff
+752:racer, race car, racing car
+753:racket, racquet
+754:radiator
+755:radio, wireless
+756:radio telescope, radio reflector
+757:rain barrel
+758:recreational vehicle, RV, R.V.
+759:reel
+760:reflex camera
+761:refrigerator, icebox
+762:remote control, remote
+763:restaurant, eating house, eating place, eatery
+764:revolver, six-gun, six-shooter
+765:rifle
+766:rocking chair, rocker
+767:rotisserie
+768:rubber eraser, rubber, pencil eraser
+769:rugby ball
+770:rule, ruler
+771:running shoe
+772:safe
+773:safety pin
+774:saltshaker, salt shaker
+775:sandal
+776:sarong
+777:sax, saxophone
+778:scabbard
+779:scale, weighing machine
+780:school bus
+781:schooner
+782:scoreboard
+783:screen, CRT screen
+784:screw
+785:screwdriver
+786:seat belt, seatbelt
+787:sewing machine
+788:shield, buckler
+789:shoe shop, shoe-shop, shoe store
+790:shoji
+791:shopping basket
+792:shopping cart
+793:shovel
+794:shower cap
+795:shower curtain
+796:ski
+797:ski mask
+798:sleeping bag
+799:slide rule, slipstick
+800:sliding door
+801:slot, one-armed bandit
+802:snorkel
+803:snowmobile
+804:snowplow, snowplough
+805:soap dispenser
+806:soccer ball
+807:sock
+808:solar dish, solar collector, solar furnace
+809:sombrero
+810:soup bowl
+811:space bar
+812:space heater
+813:space shuttle
+814:spatula
+815:speedboat
+816:spider web, spider's web
+817:spindle
+818:sports car, sport car
+819:spotlight, spot
+820:stage
+821:steam locomotive
+822:steel arch bridge
+823:steel drum
+824:stethoscope
+825:stole
+826:stone wall
+827:stopwatch, stop watch
+828:stove
+829:strainer
+830:streetcar, tram, tramcar, trolley, trolley car
+831:stretcher
+832:studio couch, day bed
+833:stupa, tope
+834:submarine, pigboat, sub, U-boat
+835:suit, suit of clothes
+836:sundial
+837:sunglass
+838:sunglasses, dark glasses, shades
+839:sunscreen, sunblock, sun blocker
+840:suspension bridge
+841:swab, swob, mop
+842:sweatshirt
+843:swimming trunks, bathing trunks
+844:swing
+845:switch, electric switch, electrical switch
+846:syringe
+847:table lamp
+848:tank, army tank, armored combat vehicle, armoured combat vehicle
+849:tape player
+850:teapot
+851:teddy, teddy bear
+852:television, television system
+853:tennis ball
+854:thatch, thatched roof
+855:theater curtain, theatre curtain
+856:thimble
+857:thresher, thrasher, threshing machine
+858:throne
+859:tile roof
+860:toaster
+861:tobacco shop, tobacconist shop, tobacconist
+862:toilet seat
+863:torch
+864:totem pole
+865:tow truck, tow car, wrecker
+866:toyshop
+867:tractor
+868:trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi
+869:tray
+870:trench coat
+871:tricycle, trike, velocipede
+872:trimaran
+873:tripod
+874:triumphal arch
+875:trolleybus, trolley coach, trackless trolley
+876:trombone
+877:tub, vat
+878:turnstile
+879:typewriter keyboard
+880:umbrella
+881:unicycle, monocycle
+882:upright, upright piano
+883:vacuum, vacuum cleaner
+884:vase
+885:vault
+886:velvet
+887:vending machine
+888:vestment
+889:viaduct
+890:violin, fiddle
+891:volleyball
+892:waffle iron
+893:wall clock
+894:wallet, billfold, notecase, pocketbook
+895:wardrobe, closet, press
+896:warplane, military plane
+897:washbasin, handbasin, washbowl, lavabo, wash-hand basin
+898:washer, automatic washer, washing machine
+899:water bottle
+900:water jug
+901:water tower
+902:whiskey jug
+903:whistle
+904:wig
+905:window screen
+906:window shade
+907:Windsor tie
+908:wine bottle
+909:wing
+910:wok
+911:wooden spoon
+912:wool, woolen, woollen
+913:worm fence, snake fence, snake-rail fence, Virginia fence
+914:wreck
+915:yawl
+916:yurt
+917:web site, website, internet site, site
+918:comic book
+919:crossword puzzle, crossword
+920:street sign
+921:traffic light, traffic signal, stoplight
+922:book jacket, dust cover, dust jacket, dust wrapper
+923:menu
+924:plate
+925:guacamole
+926:consomme
+927:hot pot, hotpot
+928:trifle
+929:ice cream, icecream
+930:ice lolly, lolly, lollipop, popsicle
+931:French loaf
+932:bagel, beigel
+933:pretzel
+934:cheeseburger
+935:hotdog, hot dog, red hot
+936:mashed potato
+937:head cabbage
+938:broccoli
+939:cauliflower
+940:zucchini, courgette
+941:spaghetti squash
+942:acorn squash
+943:butternut squash
+944:cucumber, cuke
+945:artichoke, globe artichoke
+946:bell pepper
+947:cardoon
+948:mushroom
+949:Granny Smith
+950:strawberry
+951:orange
+952:lemon
+953:fig
+954:pineapple, ananas
+955:banana
+956:jackfruit, jak, jack
+957:custard apple
+958:pomegranate
+959:hay
+960:carbonara
+961:chocolate sauce, chocolate syrup
+962:dough
+963:meat loaf, meatloaf
+964:pizza, pizza pie
+965:potpie
+966:burrito
+967:red wine
+968:espresso
+969:cup
+970:eggnog
+971:alp
+972:bubble
+973:cliff, drop, drop-off
+974:coral reef
+975:geyser
+976:lakeside, lakeshore
+977:promontory, headland, head, foreland
+978:sandbar, sand bar
+979:seashore, coast, seacoast, sea-coast
+980:valley, vale
+981:volcano
+982:ballplayer, baseball player
+983:groom, bridegroom
+984:scuba diver
+985:rapeseed
+986:daisy
+987:yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum
+988:corn
+989:acorn
+990:hip, rose hip, rosehip
+991:buckeye, horse chestnut, conker
+992:coral fungus
+993:agaric
+994:gyromitra
+995:stinkhorn, carrion fungus
+996:earthstar
+997:hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa
+998:bolete
+999:ear, spike, capitulum
+1000:toilet tissue, toilet paper, bathroom tissue
\ No newline at end of file
diff --git a/examples/imx500/imx500_classification_demo.py b/examples/imx500/imx500_classification_demo.py
new file mode 100755
index 00000000..1742471a
--- /dev/null
+++ b/examples/imx500/imx500_classification_demo.py
@@ -0,0 +1,156 @@
+import argparse
+import sys
+import time
+from typing import List
+
+import cv2
+import numpy as np
+
+from picamera2 import CompletedRequest, MappedArray, Picamera2
+from picamera2.devices import IMX500
+from picamera2.devices.imx500 import NetworkIntrinsics
+from picamera2.devices.imx500.postprocess import softmax
+
+last_detections = []
+LABELS = None
+
+
+class Classification:
+    def __init__(self, idx: int, score: float):
+        """Create a Classification object, recording the idx and score."""
+        self.idx = idx
+        self.score = score
+
+
+def get_label(request: CompletedRequest, idx: int) -> str:
+    """Retrieve the label corresponding to the classification index."""
+    global LABELS
+    if LABELS is None:
+        LABELS = intrinsics.labels
+        assert len(LABELS) in [1000, 1001], "Labels file should contain 1000 or 1001 labels."
+        output_tensor_size = imx500.get_output_shapes(request.get_metadata())[0][0]
+        if output_tensor_size == 1000:
+            LABELS = LABELS[1:]  # Ignore the background label if present
+    return LABELS[idx]
+
+
+def parse_and_draw_classification_results(request: CompletedRequest):
+    """Analyse and draw the classification results in the output tensor."""
+    results = parse_classification_results(request)
+    draw_classification_results(request, results)
+
+
+def parse_classification_results(request: CompletedRequest) -> List[Classification]:
+    """Parse the output tensor into the classification results above the threshold."""
+    global last_detections
+    np_outputs = imx500.get_outputs(request.get_metadata())
+    if np_outputs is None:
+        return last_detections
+    np_output = np_outputs[0]
+    if intrinsics.softmax:
+        np_output = softmax(np_output)
+    top_indices = np.argpartition(-np_output, 3)[:3]  # Get top 3 indices with the highest scores
+    top_indices = top_indices[np.argsort(-np_output[top_indices])]  # Sort the top 3 indices by their scores
+    last_detections = [Classification(index, np_output[index]) for index in top_indices]
+    return last_detections
+
+
+def draw_classification_results(request: CompletedRequest, results: List[Classification], stream: str = "main"):
+    """Draw the classification results for this request onto the ISP output."""
+    with MappedArray(request, stream) as m:
+        if intrinsics.preserve_aspect_ratio:
+            # Drawing ROI box
+            b_x, b_y, b_w, b_h = imx500.get_roi_scaled(request)
+            color = (255, 0, 0)  # red
+            cv2.putText(m.array, "ROI", (b_x + 5, b_y + 15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)
+            cv2.rectangle(m.array, (b_x, b_y), (b_x + b_w, b_y + b_h), (255, 0, 0, 0))
+            text_left, text_top = b_x, b_y + 20
+        else:
+            text_left, text_top = 0, 0
+        # Drawing labels (in the ROI box if it exists)
+        for index, result in enumerate(results):
+            label = get_label(request, idx=result.idx)
+            text = f"{label}: {result.score:.3f}"
+
+            # Calculate text size and position
+            (text_width, text_height), baseline = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
+            text_x = text_left + 5
+            text_y = text_top + 15 + index * 20
+
+            # Create a copy of the array to draw the background with opacity
+            overlay = m.array.copy()
+
+            # Draw the background rectangle on the overlay
+            cv2.rectangle(overlay,
+                          (text_x, text_y - text_height),
+                          (text_x + text_width, text_y + baseline),
+                          (255, 255, 255),  # Background color (white)
+                          cv2.FILLED)
+
+            alpha = 0.3
+            cv2.addWeighted(overlay, alpha, m.array, 1 - alpha, 0, m.array)
+
+            # Draw text on top of the background
+            cv2.putText(m.array, text, (text_x, text_y),
+                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1)
+
+
+def get_args():
+    """Parse command line arguments."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model", type=str, help="Path of the model",
+                        default="/usr/share/imx500-models/imx500_network_mobilenet_v2.rpk")
+    parser.add_argument("--fps", type=int, help="Frames per second")
+    parser.add_argument("-s", "--softmax", action=argparse.BooleanOptionalAction, help="Add post-process softmax")
+    parser.add_argument("-r", "--preserve-aspect-ratio", action=argparse.BooleanOptionalAction,
+                        help="preprocess the image with preserve aspect ratio")
+    parser.add_argument("--labels", type=str,
+                        help="Path to the labels file")
+    parser.add_argument("--print-intrinsics", action="store_true",
+                        help="Print JSON network_intrinsics then exit")
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+    args = get_args()
+
+    # This must be called before instantiation of Picamera2
+    imx500 = IMX500(args.model)
+    intrinsics = imx500.network_intrinsics
+    if not intrinsics:
+        intrinsics = NetworkIntrinsics()
+        intrinsics.task = "classification"
+    elif intrinsics.task != "classification":
+        print("Network is not a classification task", file=sys.stderr)
+        exit()
+
+    # Override intrinsics from args
+    for key, value in vars(args).items():
+        if key == 'labels' and value is not None:
+            with open(value, 'r') as f:
+                intrinsics.labels = f.read().splitlines()
+        elif hasattr(intrinsics, key) and value is not None:
+            setattr(intrinsics, key, value)
+
+    # Defaults
+    if intrinsics.labels is None:
+        with open("assets/imagenet_labels.txt", "r") as f:
+            intrinsics.labels = f.read().splitlines()
+    intrinsics.update_with_defaults()
+
+    if args.print_intrinsics:
+        print(intrinsics)
+        exit()
+
+    picam2 = Picamera2(imx500.camera_num)
+    config = picam2.create_preview_configuration(controls={"FrameRate": intrinsics.inference_rate}, buffer_count=12)
+
+    imx500.show_network_fw_progress_bar()
+    picam2.start(config, show_preview=True)
+    if intrinsics.preserve_aspect_ratio:
+        imx500.set_auto_aspect_ratio()
+    # Register the callback to parse and draw classification results
+    picam2.pre_callback = parse_and_draw_classification_results
+
+    while True:
+        time.sleep(0.5)
diff --git a/examples/imx500/imx500_object_detection_demo.py b/examples/imx500/imx500_object_detection_demo.py
new file mode 100755
index 00000000..ab792975
--- /dev/null
+++ b/examples/imx500/imx500_object_detection_demo.py
@@ -0,0 +1,174 @@
+import argparse
+import sys
+from functools import lru_cache
+
+import cv2
+import numpy as np
+
+from picamera2 import MappedArray, Picamera2
+from picamera2.devices import IMX500
+from picamera2.devices.imx500 import (NetworkIntrinsics,
+                                      postprocess_nanodet_detection)
+
+last_detections = []
+
+
+class Detection:
+    def __init__(self, coords, category, conf, metadata):
+        """Create a Detection object, recording the bounding box, category and confidence."""
+        self.category = category
+        self.conf = conf
+        self.box = imx500.convert_inference_coords(coords, metadata, picam2)
+
+
+def parse_detections(metadata: dict):
+    """Parse the output tensor into a number of detected objects, scaled to the ISP out."""
+    global last_detections
+    bbox_normalization = intrinsics.bbox_normalization
+    threshold = args.threshold
+    iou = args.iou
+    max_detections = args.max_detections
+
+    np_outputs = imx500.get_outputs(metadata, add_batch=True)
+    input_w, input_h = imx500.get_input_size()
+    if np_outputs is None:
+        return last_detections
+    if intrinsics.postprocess == "nanodet":
+        boxes, scores, classes = \
+            postprocess_nanodet_detection(outputs=np_outputs[0], conf=threshold, iou_thres=iou,
+                                          max_out_dets=max_detections)[0]
+        from picamera2.devices.imx500.postprocess import scale_boxes
+        boxes = scale_boxes(boxes, 1, 1, input_h, input_w, False, False)
+    else:
+        boxes, scores, classes = np_outputs[0][0], np_outputs[1][0], np_outputs[2][0]
+        if bbox_normalization:
+            boxes = boxes / input_h
+
+        boxes = np.array_split(boxes, 4, axis=1)
+        boxes = zip(*boxes)
+
+    last_detections = [
+        Detection(box, category, score, metadata)
+        for box, score, category in zip(boxes, scores, classes)
+        if score > threshold
+    ]
+    return last_detections
+
+
+@lru_cache
+def get_labels():
+    labels = intrinsics.labels
+
+    if intrinsics.ignore_dash_labels:
+        labels = [label for label in labels if label and label != "-"]
+    return labels
+
+
+def draw_detections(request, stream="main"):
+    """Draw the detections for this request onto the ISP output."""
+    detections = last_results
+    if detections is None:
+        return
+    labels = get_labels()
+    with MappedArray(request, stream) as m:
+        for detection in detections:
+            x, y, w, h = detection.box
+            label = f"{labels[int(detection.category)]} ({detection.conf:.2f})"
+
+            # Calculate text size and position
+            (text_width, text_height), baseline = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
+            text_x = x + 5
+            text_y = y + 15
+
+            # Create a copy of the array to draw the background with opacity
+            overlay = m.array.copy()
+
+            # Draw the background rectangle on the overlay
+            cv2.rectangle(overlay,
+                          (text_x, text_y - text_height),
+                          (text_x + text_width, text_y + baseline),
+                          (255, 255, 255),  # Background color (white)
+                          cv2.FILLED)
+
+            alpha = 0.30
+            cv2.addWeighted(overlay, alpha, m.array, 1 - alpha, 0, m.array)
+
+            # Draw text on top of the background
+            cv2.putText(m.array, label, (text_x, text_y),
+                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1)
+
+            # Draw detection box
+            cv2.rectangle(m.array, (x, y), (x + w, y + h), (0, 255, 0, 0), thickness=2)
+
+        if intrinsics.preserve_aspect_ratio:
+            b_x, b_y, b_w, b_h = imx500.get_roi_scaled(request)
+            color = (255, 0, 0)  # red
+            cv2.putText(m.array, "ROI", (b_x + 5, b_y + 15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)
+            cv2.rectangle(m.array, (b_x, b_y), (b_x + b_w, b_y + b_h), (255, 0, 0, 0))
+
+
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model", type=str, help="Path of the model",
+                        default="/usr/share/imx500-models/imx500_network_ssd_mobilenetv2_fpnlite_320x320_pp.rpk")
+    parser.add_argument("--fps", type=int, help="Frames per second")
+    parser.add_argument("--bbox-normalization", action=argparse.BooleanOptionalAction, help="Normalize bbox")
+    parser.add_argument("--threshold", type=float, default=0.55, help="Detection threshold")
+    parser.add_argument("--iou", type=float, default=0.65, help="Set iou threshold")
+    parser.add_argument("--max-detections", type=int, default=10, help="Set max detections")
+    parser.add_argument("--ignore-dash-labels", action=argparse.BooleanOptionalAction, help="Remove '-' labels ")
+    parser.add_argument("--postprocess", choices=["", "nanodet"],
+                        default=None, help="Run post process of type")
+    parser.add_argument("-r", "--preserve-aspect-ratio", action=argparse.BooleanOptionalAction,
+                        help="preserve the pixel aspect ratio of the input tensor")
+    parser.add_argument("--labels", type=str,
+                        help="Path to the labels file")
+    parser.add_argument("--print-intrinsics", action="store_true",
+                        help="Print JSON network_intrinsics then exit")
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+    args = get_args()
+
+    # This must be called before instantiation of Picamera2
+    imx500 = IMX500(args.model)
+    intrinsics = imx500.network_intrinsics
+    if not intrinsics:
+        intrinsics = NetworkIntrinsics()
+        intrinsics.task = "object detection"
+    elif intrinsics.task != "object detection":
+        print("Network is not an object detection task", file=sys.stderr)
+        exit()
+
+    # Override intrinsics from args
+    for key, value in vars(args).items():
+        if key == 'labels' and value is not None:
+            with open(value, 'r') as f:
+                intrinsics.labels = f.read().splitlines()
+        elif hasattr(intrinsics, key) and value is not None:
+            setattr(intrinsics, key, value)
+
+    # Defaults
+    if intrinsics.labels is None:
+        with open("assets/coco_labels.txt", "r") as f:
+            intrinsics.labels = f.read().splitlines()
+    intrinsics.update_with_defaults()
+
+    if args.print_intrinsics:
+        print(intrinsics)
+        exit()
+
+    picam2 = Picamera2(imx500.camera_num)
+    config = picam2.create_preview_configuration(controls={"FrameRate": intrinsics.inference_rate}, buffer_count=12)
+
+    imx500.show_network_fw_progress_bar()
+    picam2.start(config, show_preview=True)
+
+    if intrinsics.preserve_aspect_ratio:
+        imx500.set_auto_aspect_ratio()
+
+    last_results = None
+    picam2.pre_callback = draw_detections
+    while True:
+        last_results = parse_detections(picam2.capture_metadata())
diff --git a/examples/imx500/imx500_object_detection_demo_mp.py b/examples/imx500/imx500_object_detection_demo_mp.py
new file mode 100755
index 00000000..3bce0e6f
--- /dev/null
+++ b/examples/imx500/imx500_object_detection_demo_mp.py
@@ -0,0 +1,194 @@
+import argparse
+import multiprocessing
+import queue
+import sys
+import threading
+from functools import lru_cache
+
+import cv2
+import numpy as np
+
+from picamera2 import MappedArray, Picamera2
+from picamera2.devices import IMX500
+from picamera2.devices.imx500 import (NetworkIntrinsics,
+                                      postprocess_nanodet_detection)
+
+
+class Detection:
+    def __init__(self, coords, category, conf, metadata):
+        """Create a Detection object, recording the bounding box, category and confidence."""
+        self.category = category
+        self.conf = conf
+        self.box = imx500.convert_inference_coords(coords, metadata, picam2)
+
+
+def parse_detections(metadata: dict):
+    """Parse the output tensor into a number of detected objects, scaled to the ISP out."""
+    bbox_normalization = intrinsics.bbox_normalization
+    threshold = args.threshold
+    iou = args.iou
+    max_detections = args.max_detections
+
+    np_outputs = imx500.get_outputs(metadata, add_batch=True)
+    input_w, input_h = imx500.get_input_size()
+    if np_outputs is None:
+        return None
+    if intrinsics.postprocess == "nanodet":
+        boxes, scores, classes = \
+            postprocess_nanodet_detection(outputs=np_outputs[0], conf=threshold, iou_thres=iou,
+                                          max_out_dets=max_detections)[0]
+        from picamera2.devices.imx500.postprocess import scale_boxes
+        boxes = scale_boxes(boxes, 1, 1, input_h, input_w, False, False)
+    else:
+        boxes, scores, classes = np_outputs[0][0], np_outputs[1][0], np_outputs[2][0]
+        if bbox_normalization:
+            boxes = boxes / input_h
+
+        boxes = np.array_split(boxes, 4, axis=1)
+        boxes = zip(*boxes)
+
+    detections = [
+        Detection(box, category, score, metadata)
+        for box, score, category in zip(boxes, scores, classes)
+        if score > threshold
+    ]
+    return detections
+
+
+@lru_cache
+def get_labels():
+    labels = intrinsics.labels
+
+    if intrinsics.ignore_dash_labels:
+        labels = [label for label in labels if label and label != "-"]
+    return labels
+
+
+def draw_detections(jobs):
+    """Draw the detections for this request onto the ISP output."""
+    labels = get_labels()
+    # Wait for result from child processes in the order submitted.
+    last_detections = []
+    while (job := jobs.get()) is not None:
+        request, async_result = job
+        detections = async_result.get()
+        if detections is None:
+            detections = last_detections
+        last_detections = detections
+        with MappedArray(request, 'main') as m:
+            for detection in detections:
+                x, y, w, h = detection.box
+                label = f"{labels[int(detection.category)]} ({detection.conf:.2f})"
+
+                # Calculate text size and position
+                (text_width, text_height), baseline = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
+                text_x = x + 5
+                text_y = y + 15
+
+                # Create a copy of the array to draw the background with opacity
+                overlay = m.array.copy()
+
+                # Draw the background rectangle on the overlay
+                cv2.rectangle(overlay,
+                              (text_x, text_y - text_height),
+                              (text_x + text_width, text_y + baseline),
+                              (255, 255, 255),  # Background color (white)
+                              cv2.FILLED)
+
+                alpha = 0.3
+                cv2.addWeighted(overlay, alpha, m.array, 1 - alpha, 0, m.array)
+
+                # Draw text on top of the background
+                cv2.putText(m.array, label, (text_x, text_y),
+                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1)
+
+                # Draw detection box
+                cv2.rectangle(m.array, (x, y), (x + w, y + h), (0, 255, 0), thickness=2)
+
+            if intrinsics.preserve_aspect_ratio:
+                b_x, b_y, b_w, b_h = imx500.get_roi_scaled(request)
+                color = (255, 0, 0)  # red
+                cv2.putText(m.array, "ROI", (b_x + 5, b_y + 15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)
+                cv2.rectangle(m.array, (b_x, b_y), (b_x + b_w, b_y + b_h), (255, 0, 0, 0))
+
+            cv2.imshow('IMX500 Object Detection', m.array)
+            cv2.waitKey(1)
+        request.release()
+
+
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model", type=str, help="Path of the model",
+                        default="/usr/share/imx500-models/imx500_network_ssd_mobilenetv2_fpnlite_320x320_pp.rpk")
+    parser.add_argument("--fps", type=int, help="Frames per second")
+    parser.add_argument("--bbox-normalization", action=argparse.BooleanOptionalAction, help="Normalize bbox")
+    parser.add_argument("--threshold", type=float, default=0.55, help="Detection threshold")
+    parser.add_argument("--iou", type=float, default=0.65, help="Set iou threshold")
+    parser.add_argument("--max-detections", type=int, default=10, help="Set max detections")
+    parser.add_argument("--ignore-dash-labels", action=argparse.BooleanOptionalAction, help="Remove '-' labels ")
+    parser.add_argument("--postprocess", choices=["", "nanodet"],
+                        default=None, help="Run post process of type")
+    parser.add_argument("-r", "--preserve-aspect-ratio", action=argparse.BooleanOptionalAction,
+                        help="preserve the pixel aspect ratio of the input tensor")
+    parser.add_argument("--labels", type=str,
+                        help="Path to the labels file")
+    parser.add_argument("--print-intrinsics", action="store_true",
+                        help="Print JSON network_intrinsics then exit")
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+    args = get_args()
+
+    # This must be called before instantiation of Picamera2
+    imx500 = IMX500(args.model)
+    intrinsics = imx500.network_intrinsics
+    if not intrinsics:
+        intrinsics = NetworkIntrinsics()
+        intrinsics.task = "object detection"
+    elif intrinsics.task != "object detection":
+        print("Network is not an object detection task", file=sys.stderr)
+        exit()
+
+    # Override intrinsics from args
+    for key, value in vars(args).items():
+        if key == 'labels' and value is not None:
+            with open(value, 'r') as f:
+                intrinsics.labels = f.read().splitlines()
+        elif hasattr(intrinsics, key) and value is not None:
+            setattr(intrinsics, key, value)
+
+    # Defaults
+    if intrinsics.labels is None:
+        with open("assets/coco_labels.txt", "r") as f:
+            intrinsics.labels = f.read().splitlines()
+    intrinsics.update_with_defaults()
+
+    if args.print_intrinsics:
+        print(intrinsics)
+        exit()
+
+    picam2 = Picamera2(imx500.camera_num)
+    main = {'format': 'RGB888'}
+    config = picam2.create_preview_configuration(main, controls={"FrameRate": intrinsics.inference_rate}, buffer_count=12)
+
+    imx500.show_network_fw_progress_bar()
+    picam2.start(config, show_preview=False)
+    if intrinsics.preserve_aspect_ratio:
+        imx500.set_auto_aspect_ratio()
+
+    pool = multiprocessing.Pool(processes=4)
+    jobs = queue.Queue()
+
+    thread = threading.Thread(target=draw_detections, args=(jobs,))
+    thread.start()
+
+    while True:
+        # The request gets released by handle_results
+        request = picam2.capture_request()
+        metadata = request.get_metadata()
+        if metadata:
+            async_result = pool.apply_async(parse_detections, (metadata,))
+            jobs.put((request, async_result))
+        else:
+            request.release()
diff --git a/examples/imx500/imx500_pose_estimation_higherhrnet_demo.py b/examples/imx500/imx500_pose_estimation_higherhrnet_demo.py
new file mode 100755
index 00000000..f443308d
--- /dev/null
+++ b/examples/imx500/imx500_pose_estimation_higherhrnet_demo.py
@@ -0,0 +1,117 @@
+import argparse
+import sys
+import time
+
+import numpy as np
+
+from picamera2 import CompletedRequest, MappedArray, Picamera2
+from picamera2.devices.imx500 import IMX500, NetworkIntrinsics
+from picamera2.devices.imx500.postprocess import COCODrawer
+from picamera2.devices.imx500.postprocess_highernet import \
+    postprocess_higherhrnet
+
+last_boxes = None
+last_scores = None
+last_keypoints = None
+WINDOW_SIZE_H_W = (480, 640)
+
+
+def ai_output_tensor_parse(metadata: dict):
+    """Parse the output tensor into a number of detected objects, scaled to the ISP out."""
+    global last_boxes, last_scores, last_keypoints
+    np_outputs = imx500.get_outputs(metadata=metadata, add_batch=True)
+    if np_outputs is not None:
+        keypoints, scores, boxes = postprocess_higherhrnet(outputs=np_outputs,
+                                                           img_size=WINDOW_SIZE_H_W,
+                                                           img_w_pad=(0, 0),
+                                                           img_h_pad=(0, 0),
+                                                           detection_threshold=args.detection_threshold,
+                                                           network_postprocess=True)
+
+        if scores is not None and len(scores) > 0:
+            last_keypoints = np.reshape(np.stack(keypoints, axis=0), (len(scores), 17, 3))
+            last_boxes = [np.array(b) for b in boxes]
+            last_scores = np.array(scores)
+    return last_boxes, last_scores, last_keypoints
+
+
+def ai_output_tensor_draw(request: CompletedRequest, boxes, scores, keypoints, stream='main'):
+    """Draw the detections for this request onto the ISP output."""
+    with MappedArray(request, stream) as m:
+        if boxes is not None and len(boxes) > 0:
+            drawer.annotate_image(m.array, boxes, scores,
+                                  np.zeros(scores.shape), keypoints, args.detection_threshold,
+                                  args.detection_threshold, request.get_metadata(), picam2, stream)
+
+
+def picamera2_pre_callback(request: CompletedRequest):
+    """Analyse the detected objects in the output tensor and draw them on the main output image."""
+    boxes, scores, keypoints = ai_output_tensor_parse(request.get_metadata())
+    ai_output_tensor_draw(request, boxes, scores, keypoints)
+
+
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model", type=str, help="Path of the model",
+                        default="/usr/share/imx500-models/imx500_network_higherhrnet_coco.rpk")
+    parser.add_argument("--fps", type=int, help="Frames per second")
+    parser.add_argument("--detection-threshold", type=float, default=0.3,
+                        help="Post-process detection threshold")
+    parser.add_argument("--labels", type=str,
+                        help="Path to the labels file")
+    parser.add_argument("--print-intrinsics", action="store_true",
+                        help="Print JSON network_intrinsics then exit")
+    return parser.parse_args()
+
+
+def get_drawer():
+    categories = intrinsics.labels
+    categories = [c for c in categories if c and c != "-"]
+    return COCODrawer(categories, imx500, needs_rescale_coords=False)
+
+
+if __name__ == "__main__":
+    args = get_args()
+
+    # This must be called before instantiation of Picamera2
+    imx500 = IMX500(args.model)
+    intrinsics = imx500.network_intrinsics
+    if not intrinsics:
+        intrinsics = NetworkIntrinsics()
+        intrinsics.task = "pose estimation"
+    elif intrinsics.task != "pose estimation":
+        print("Network is not a pose estimation task", file=sys.stderr)
+        exit()
+
+    # Override intrinsics from args
+    for key, value in vars(args).items():
+        if key == 'labels' and value is not None:
+            with open(value, 'r') as f:
+                intrinsics.labels = f.read().splitlines()
+        elif hasattr(intrinsics, key) and value is not None:
+            setattr(intrinsics, key, value)
+
+    # Defaults
+    if intrinsics.inference_rate is None:
+        intrinsics.inference_rate = 10
+    if intrinsics.labels is None:
+        with open("assets/coco_labels.txt", "r") as f:
+            intrinsics.labels = f.read().splitlines()
+    intrinsics.update_with_defaults()
+
+    if args.print_intrinsics:
+        print(intrinsics)
+        exit()
+
+    drawer = get_drawer()
+
+    picam2 = Picamera2(imx500.camera_num)
+    config = picam2.create_preview_configuration(controls={'FrameRate': intrinsics.inference_rate}, buffer_count=12)
+
+    imx500.show_network_fw_progress_bar()
+    picam2.start(config, show_preview=True)
+    imx500.set_auto_aspect_ratio()
+    picam2.pre_callback = picamera2_pre_callback
+
+    while True:
+        time.sleep(0.5)
diff --git a/examples/imx500/imx500_segmentation_demo.py b/examples/imx500/imx500_segmentation_demo.py
new file mode 100755
index 00000000..4f3c1e77
--- /dev/null
+++ b/examples/imx500/imx500_segmentation_demo.py
@@ -0,0 +1,101 @@
+import argparse
+import sys
+import time
+from typing import Dict
+
+import numpy as np
+
+from picamera2 import CompletedRequest, Picamera2
+from picamera2.devices import IMX500
+from picamera2.devices.imx500 import NetworkIntrinsics
+
+COLOURS = np.loadtxt("assets/colours.txt")
+
+
+def create_and_draw_masks(request: CompletedRequest):
+    """Create masks from the output tensor and draw them on the main output image."""
+    masks = create_masks(request)
+    draw_masks(masks)
+
+
+def create_masks(request: CompletedRequest) -> Dict[int, np.ndarray]:
+    """Create masks from the output tensor, scaled to the ISP out."""
+    res = {}
+    np_outputs = imx500.get_outputs(metadata=request.get_metadata())
+    input_w, input_h = imx500.get_input_size()
+    if np_outputs is None:
+        return res
+    mask = np_outputs[0]
+    found_indices = np.unique(mask)
+
+    for i in found_indices:
+        if i == 0:
+            continue
+        output_shape = [input_h, input_w, 4]
+        colour = [(0, 0, 0, 0), COLOURS[int(i)]]
+        colour[1][3] = 150  # update the alpha value here, to save setting it later
+        overlay = np.array(mask == i, dtype=np.uint8)
+        overlay = np.array(colour)[overlay].reshape(output_shape).astype(np.uint8)
+        # No need to resize the overlay, it will be stretched to the output window.
+        res[i] = overlay
+    return res
+
+
+def draw_masks(masks: Dict[int, np.ndarray]):
+    """Draw the masks for this request onto the ISP output."""
+    if not masks:
+        return
+    input_w, input_h = imx500.get_input_size()
+    output_shape = [input_h, input_w, 4]
+    overlay = np.zeros(output_shape, dtype=np.uint8)
+    if masks:
+        for v in masks.values():
+            overlay += v
+        # Set Alphas and overlay
+        picam2.set_overlay(overlay)
+
+
+def get_args():
+    """Parse command line arguments."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model", type=str, help="Path of the model",
+                        default="/usr/share/imx500-models/imx500_network_deeplabv3plus.rpk")
+    parser.add_argument("--fps", type=int, help="Frames per second")
+    parser.add_argument("--print-intrinsics", action="store_true",
+                        help="Print JSON network_intrinsics then exit")
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+    args = get_args()
+
+    # This must be called before instantiation of Picamera2
+    imx500 = IMX500(args.model)
+    intrinsics = imx500.network_intrinsics
+    if not intrinsics:
+        intrinsics = NetworkIntrinsics()
+        intrinsics.task = "segmentation"
+    elif intrinsics.task != "segmentation":
+        print("Network is not a segmentation task", file=sys.stderr)
+        exit()
+
+    # Override intrinsics from args
+    for key, value in vars(args).items():
+        if hasattr(intrinsics, key) and value is not None:
+            setattr(intrinsics, key, value)
+
+    # Defaults
+    intrinsics.update_with_defaults()
+
+    if args.print_intrinsics:
+        print(intrinsics)
+        exit()
+
+    picam2 = Picamera2(imx500.camera_num)
+    config = picam2.create_preview_configuration(controls={'FrameRate': intrinsics.inference_rate}, buffer_count=12)
+    imx500.show_network_fw_progress_bar()
+    picam2.start(config, show_preview=True)
+    picam2.pre_callback = create_and_draw_masks
+
+    while True:
+        time.sleep(0.5)
diff --git a/picamera2/devices/__init__.py b/picamera2/devices/__init__.py
index a8ccb03b..5ef65f35 100644
--- a/picamera2/devices/__init__.py
+++ b/picamera2/devices/__init__.py
@@ -3,4 +3,5 @@
     from .hailo import Hailo
 except ModuleNotFoundError:
     pass
+from .imx500 import IMX500
 from .imx708 import IMX708
diff --git a/picamera2/devices/imx500/__init__.py b/picamera2/devices/imx500/__init__.py
new file mode 100644
index 00000000..2cabd5ee
--- /dev/null
+++ b/picamera2/devices/imx500/__init__.py
@@ -0,0 +1,6 @@
+from .imx500 import IMX500, NetworkIntrinsics
+from .postprocess_efficientdet_lite0 import \
+    postprocess_efficientdet_lite0_detection
+from .postprocess_nanodet import postprocess_nanodet_detection
+from .postprocess_yolov5 import postprocess_yolov5_detection
+from .postprocess_yolov8 import postprocess_yolov8_detection
diff --git a/picamera2/devices/imx500/imx500.py b/picamera2/devices/imx500/imx500.py
new file mode 100644
index 00000000..24748497
--- /dev/null
+++ b/picamera2/devices/imx500/imx500.py
@@ -0,0 +1,709 @@
+import ctypes
+import fcntl
+import io
+import json
+import multiprocessing
+import os
+import struct
+import sys
+import time
+from typing import List, Optional
+
+import jsonschema
+import numpy as np
+from libarchive.read import fd_reader
+from libcamera import Rectangle, Size
+from tqdm import tqdm
+from v4l2 import (VIDIOC_S_CTRL, VIDIOC_S_EXT_CTRLS, v4l2_control,
+                  v4l2_ext_control, v4l2_ext_controls)
+
+from picamera2 import CompletedRequest, Picamera2
+
+NETWORK_NAME_LEN = 64
+MAX_NUM_TENSORS = 8
+MAX_NUM_DIMENSIONS = 8
+
+FW_LOADER_STAGE = 0
+FW_MAIN_STAGE = 1
+FW_NETWORK_STAGE = 2
+
+NETWORK_FW_FD_CTRL_ID = 0x00982901
+ROI_CTRL_ID = 0x00982900
+
+
+# struct OutputTensorInfo from libcamera
+class OutputTensorInfo(ctypes.LittleEndianStructure):
+    _fields_ = [
+        ('tensor_data_num', ctypes.c_uint32),
+        ('num_dimensions', ctypes.c_uint32),
+        ('size', ctypes.c_uint16 * MAX_NUM_DIMENSIONS),
+    ]
+
+
+# struct CnnOutputTensorInfoExported from libcamera
+class CnnOutputTensorInfoExported(ctypes.LittleEndianStructure):
+    _fields_ = [
+        ('network_name', ctypes.c_char * NETWORK_NAME_LEN),
+        ('num_tensors', ctypes.c_uint32),
+        ('info', OutputTensorInfo * MAX_NUM_TENSORS)
+    ]
+
+
+class NetworkIntrinsics:
+    def __init__(self, val=None):
+        self.__intrinsics: Optional[dict] = None
+        self.__schema = {
+            "$schema": "https://json-schema.org/draft-07/schema",
+            "title": "network_intrinsics",
+            "type": "object",
+            "properties": {
+                "task": {
+                    "type": "string",
+                    "enum": ["classification", "object detection", "pose estimation", "segmentation"],
+                    "description": "Network task",
+                },
+                "inference_rate": {"type": "number", "minimum": 0},
+                "cpu": {
+                    "type": "object",
+                    "properties": {
+                        "bbox_normalization": {"type": "boolean"},
+                        "softmax": {"type": "boolean"},
+                        "post_processing": {"type": "string"},
+                    },
+                },
+                "input_aspect_ratio": {
+                    "type": "object",
+                    "properties": {
+                        "width": {"type": "integer", "exclusiveMinimum": 0},
+                        "height": {"type": "integer", "exclusiveMinimum": 0},
+                    },
+                    "required": ["width", "height"],
+                },
+                "classes": {
+                    "type": "object",
+                    "properties": {
+                        "labels": {"type": "array", "items": {"type": "string"}},
+                        "ignore_undefined": {"type": "boolean"},
+                    },
+                },
+            },
+        }
+        if val is not None:
+            jsonschema.validate(val, self.__schema)
+            self.__intrinsics = val
+
+        self.__defaults = {'inference_rate': 30.0}
+        jsonschema.validate(self.__defaults, self.__schema | {'additionalProperties': False})
+
+    @property
+    def intrinsics(self) -> Optional[dict]:
+        return self.__intrinsics
+
+    @intrinsics.setter
+    def intrinsics(self, val):
+        jsonschema.validate(val, self.__schema)
+        self.__intrinsics = val
+
+    def __repr__(self):
+        return json.dumps(self.__intrinsics) if self.__intrinsics else ""
+
+    def __top_level_validated_insert(self, val: dict):
+        jsonschema.validate(val, self.__schema | {'additionalProperties': False})
+        self.__intrinsics = self.__intrinsics | val if self.__intrinsics else val
+
+    def __intrinsics_has_key(self, key: str) -> bool:
+        return key in self.__intrinsics if self.__intrinsics else False
+
+    def __intrinsics_get_key(self, key, default=None):
+        return self.__intrinsics.get(key, default) if self.__intrinsics else default
+
+    def update_with_defaults(self):
+        # Updates intrinsics with default settings (but does not overwrite)
+        if not self.__intrinsics:
+            self.__intrinsics = {}
+        self.__intrinsics = self.__defaults | self.__intrinsics
+
+    @property
+    def task(self) -> Optional[str]:
+        return self.__intrinsics_get_key('task')
+
+    @task.setter
+    def task(self, val: str):
+        self.__top_level_validated_insert({'task': val})
+
+    @property
+    def inference_rate(self) -> Optional[float]:
+        return self.__intrinsics_get_key('inference_rate')
+
+    @inference_rate.setter
+    def inference_rate(self, val: float):
+        if val < 0:
+            if self.__intrinsics is not None:
+                self.__intrinsics.pop('inference_rate', None)
+        else:
+            self.__top_level_validated_insert({'inference_rate': val})
+
+    @property
+    def fps(self) -> Optional[float]:
+        # @deprecated("Prefer inference_rate")
+        return self.inference_rate
+
+    @fps.setter
+    def fps(self, val: Optional[float]):
+        # @deprecated("Prefer inference_rate")
+        self.inference_rate = val
+
+    def __get_cpu(self, key: str):
+        return self.__intrinsics['cpu'].get(key, None) if self.__intrinsics_has_key('cpu') else None
+
+    def __set_cpu(self, val: dict):
+        jsonschema.validate({'cpu': val}, self.__schema | {'additionalProperties': False})
+        cpu = self.__intrinsics_get_key('cpu', {}) | val
+        if self.__intrinsics:
+            self.__intrinsics['cpu'] = cpu
+        else:
+            self.__intrinsics = {'cpu': cpu}
+
+    @property
+    def bbox_normalization(self) -> Optional[bool]:
+        return self.__get_cpu('bbox_normalization')
+
+    @bbox_normalization.setter
+    def bbox_normalization(self, val: Optional[bool]):
+        if val is None:
+            return
+
+        if val:
+            self.__set_cpu({'bbox_normalization': val})
+        elif self.__intrinsics_has_key('cpu'):
+            self.__intrinsics['cpu'].pop('bbox_normalization', None)
+
+        if self.__intrinsics_has_key('cpu') and len(self.__intrinsics['cpu']) == 0:
+            self.__intrinsics.pop('cpu')
+
+    @property
+    def softmax(self) -> Optional[bool]:
+        return self.__get_cpu('softmax')
+
+    @softmax.setter
+    def softmax(self, val: Optional[bool]):
+        if val is None:
+            return
+
+        if val:
+            self.__set_cpu({'softmax': val})
+        elif self.__intrinsics_has_key('cpu'):
+            self.__intrinsics['cpu'].pop('softmax', None)
+
+        if self.__intrinsics_has_key('cpu') and len(self.__intrinsics['cpu']) == 0:
+            self.__intrinsics.pop('cpu')
+
+    @property
+    def postprocess(self) -> Optional[str]:
+        return self.__get_cpu('post_processing')
+
+    @postprocess.setter
+    def postprocess(self, val: str):
+        if val != "":
+            self.__set_cpu({'post_processing': val})
+        elif self.__intrinsics_has_key('cpu'):
+            self.__intrinsics['cpu'].pop('post_processing', None)
+
+        if self.__intrinsics_has_key('cpu') and len(self.__intrinsics['cpu']) == 0:
+            self.__intrinsics.pop('cpu')
+
+    @property
+    def preserve_aspect_ratio(self) -> Optional[bool]:
+        if not self.__intrinsics_has_key('input_aspect_ratio'):
+            return None
+        ar = self.__intrinsics['input_aspect_ratio']
+        return ar['width'] == ar['height']
+
+    @preserve_aspect_ratio.setter
+    def preserve_aspect_ratio(self, val: Optional[bool]):
+        if val is None:
+            return
+
+        if val:
+            iar = {'input_aspect_ratio': {'width': 1, 'height': 1}}
+            self.__top_level_validated_insert(iar)
+        elif self.__intrinsics_has_key('input_aspect_ratio'):
+            self.__intrinsics.pop('input_aspect_ratio')
+
+    @property
+    def labels(self) -> Optional[List[str]]:
+        return self.__intrinsics['classes'].get('labels', None) if self.__intrinsics_has_key('classes') else None
+
+    @labels.setter
+    def labels(self, val: List[str]):
+        if len(val) != 0:
+            classes = {'labels': val}
+            jsonschema.validate({'classes': classes}, self.__schema | {'additionalProperties': False})
+
+            classes = self.__intrinsics_get_key('classes', {}) | classes
+            if self.__intrinsics:
+                self.__intrinsics['classes'] = classes
+            else:
+                self.__intrinsics = {'classes': classes}
+        elif self.__intrinsics_has_key('classes'):
+            self.__intrinsics['classes'].pop('labels', None)
+            if len(self.__intrinsics['classes']) == 0:
+                self.__intrinsics.pop('classes')
+
+    @property
+    def ignore_dash_labels(self) -> Optional[bool]:
+        return self.__intrinsics['classes'].get('ignore_undefined', None) if self.__intrinsics_has_key('classes') else None
+
+    @ignore_dash_labels.setter
+    def ignore_dash_labels(self, val: Optional[bool]):
+        if val is None:
+            return
+
+        if val:
+            iu = {'ignore_undefined': val}
+            jsonschema.validate({'classes': iu}, self.__schema | {'additionalProperties': False})
+
+            classes = {'classes': self.__intrinsics_get_key('classes', {}) | iu}
+            self.__intrinsics = self.__intrinsics | classes if self.__intrinsics else classes
+        elif self.__intrinsics_has_key('classes'):
+            self.__intrinsics['classes'].pop('ignore_undefined', None)
+            if len(self.__intrinsics['classes']) == 0:
+                self.__intrinsics.pop('classes')
+
+
+class IMX500:
+    def __init__(self, network_file: str, camera_id: str = ''):
+        self.device_fd = None
+        self.fw_progress = None
+        self.fw_progress_chunk = None
+        self.__cfg = {'network_file': network_file, 'input_tensor': {}}
+
+        imx500_device_id = None
+        spi_device_id = None
+        for i in range(32):
+            test_dir = f'/sys/class/video4linux/v4l-subdev{i}/device'
+            module_dir = f'{test_dir}/driver/module'
+            id_dir = f'{test_dir}/of_node'
+            if os.path.exists(module_dir) and os.path.islink(module_dir) and os.path.islink(id_dir) \
+                    and 'imx500' in os.readlink(module_dir):
+                if camera_id == '' or (camera_id in os.readlink(id_dir)):
+                    self.device_fd = open(f'/dev/v4l-subdev{i}', 'rb+', buffering=0)
+                    imx500_device_id = os.readlink(test_dir).split('/')[-1]
+                    spi_device_id = imx500_device_id.replace('001a', '0040')
+                    camera_info = Picamera2.global_camera_info()
+                    self.__camera_num = next((c['Num'] for c in camera_info if c['Model'] == 'imx500'
+                                              and c['Id'] in os.readlink(id_dir)))
+                    break
+
+        if self.device_fd is None:
+            raise RuntimeError('IMX500: Requested camera dev-node not found')
+
+        # Progress status specific debugfs entries.
+        if imx500_device_id:
+            self.fw_progress = open(f'/sys/kernel/debug/imx500-fw:{imx500_device_id}/fw_progress', 'r')
+        if spi_device_id:
+            self.fw_progress_chunk = open(f'/sys/kernel/debug/rp2040-spi:{spi_device_id}/transfer_progress', 'r')
+
+        if self.config['network_file'] != '':
+            self.__set_network_firmware(os.path.abspath(self.config['network_file']))
+            self.__ni_from_network(os.path.abspath(self.config['network_file']))
+
+        if 'norm_val' not in self.__cfg['input_tensor']:
+            self.__cfg['input_tensor']['norm_val'] = [-2048, -2048, -2048]
+        if 'norm_shift' not in self.__cfg:
+            self.__cfg['input_tensor']['norm_shift'] = [4, 4, 4]
+        if 'div_val' not in self.__cfg:
+            self.__cfg['input_tensor']['div_val'] = [1024, 1024, 1024]
+        if 'div_shift' not in self.__cfg:
+            self.__cfg['input_tensor']['div_shift'] = 6
+
+        full_sensor = self.__get_full_sensor_resolution()
+        self.set_inference_roi_abs(full_sensor.to_tuple())
+
+    @staticmethod
+    def __get_full_sensor_resolution():
+        """Full sensor resolution as a Rectangle object."""
+        return Rectangle(0, 0, 4056, 3040)
+
+    def __del__(self):
+        if self.device_fd:
+            self.device_fd.close()
+
+    @property
+    def camera_num(self):
+        return self.__camera_num
+
+    @property
+    def config(self) -> dict:
+        return self.__cfg
+
+    @property
+    def network_intrinsics(self) -> Optional[NetworkIntrinsics]:
+        return self.__cfg.get('intrinsics', None)
+
+    def convert_inference_coords(self, coords: tuple, metadata: dict, picam2: Picamera2, stream='main') -> tuple:
+        """Convert relative inference coordinates into the output image coordinates space."""
+        isp_output_size = Size(*picam2.camera_configuration()[stream]['size'])
+        sensor_output_size = Size(*picam2.camera_configuration()['raw']['size'])
+        scaler_crop = Rectangle(*metadata['ScalerCrop'])
+
+        y0, x0, y1, x1 = coords
+        full_sensor = self.__get_full_sensor_resolution()
+        width, height = full_sensor.size.to_tuple()
+        obj = Rectangle(
+            *np.maximum(
+                np.array([x0 * width, y0 * height, (x1 - x0) * width, (y1 - y0) * height]),
+                0,
+            ).astype(np.int32)
+        )
+        out = self.__get_obj_scaled(obj, isp_output_size, scaler_crop, sensor_output_size)
+        return out.to_tuple()
+
+    def get_fw_upload_progress(self, stage_req) -> tuple:
+        """Returns the current progress of the fw upload in the form of (current, total)."""
+        progress_block = 0
+        progress_chunk = 0
+        size = 0
+        stage = 0
+
+        if self.fw_progress:
+            self.fw_progress.seek(0)
+            progress = self.fw_progress.readline().strip().split()
+            stage = int(progress[0])
+            progress_block = int(progress[1])
+            size = int(progress[2])
+
+        if self.fw_progress_chunk:
+            self.fw_progress_chunk.seek(0)
+            progress_chunk = int(self.fw_progress_chunk.readline().strip())
+
+        if stage == stage_req:
+            return (min(progress_block + progress_chunk, size), size)
+        else:
+            return (0, 0)
+
+    def show_network_fw_progress_bar(self):
+        p = multiprocessing.Process(target=self.__do_progress_bar,
+                                    args=(FW_NETWORK_STAGE, 'Network Firmware Upload'))
+        p.start()
+        p.join(0)
+
+    def __do_progress_bar(self, stage_req, title):
+        with tqdm(unit='bytes', unit_scale=True, unit_divisor=1024, desc=title, leave=True) as t:
+            last_update = 0
+            while True:
+                current, total = self.get_fw_upload_progress(stage_req)
+                if total:
+                    t.total = total
+                    t.update(current - last_update)
+                    last_update = current
+                    if current > 0.95 * total:
+                        t.update(total - last_update)
+                        break
+                time.sleep(0.5)
+
+    def get_roi_scaled(self, request: CompletedRequest, stream="main") -> tuple:
+        """Get the region of interest (ROI) in output image coordinates space."""
+        picam2 = request.picam2
+        isp_output_size = self.get_isp_output_size(picam2, stream)
+        sensor_output_size = self.get_isp_output_size(picam2, 'raw')
+        scaler_crop = Rectangle(*request.get_metadata()['ScalerCrop'])
+        obj = self.__get_full_sensor_resolution()
+        roi = self.__get_obj_scaled(obj, isp_output_size, scaler_crop, sensor_output_size)
+        return roi.to_tuple()
+
+    @staticmethod
+    def get_isp_output_size(picam2, stream="main") -> tuple:
+        return Size(*picam2.camera_configuration()[stream]['size'])
+
+    def __get_obj_scaled(self, obj, isp_output_size, scaler_crop, sensor_output_size) -> Rectangle:
+        """Scale the object coordinates based on the camera configuration and sensor properties."""
+        full_sensor = self.__get_full_sensor_resolution()
+        width, height = full_sensor.size.to_tuple()
+        sensor_crop = scaler_crop.scaled_by(sensor_output_size, full_sensor.size)
+
+        # Make sure the object is bound to the user requested ROI.
+        if 'roi' in self.config and self.config['roi'] != Rectangle(0, 0, 0, 0):
+            obj = obj.bounded_to(self.config['roi'])
+
+        obj_sensor = obj.scaled_by(sensor_output_size, Size(width, height))
+        obj_bound = obj_sensor.bounded_to(sensor_crop)
+        obj_translated = obj_bound.translated_by(-sensor_crop.topLeft)
+        obj_scaled = obj_translated.scaled_by(isp_output_size, sensor_crop.size)
+        return obj_scaled
+
+    def get_input_size(self) -> tuple:
+        """Get the model input tensor size as (width, height)."""
+        return self.config['input_tensor_size']
+
+    def input_tensor_image(self, input_tensor):
+        """Convert input tensor in planar format to interleaved RGB."""
+        width = self.config['input_tensor']['width']
+        height = self.config['input_tensor']['height']
+        r1 = np.array(input_tensor, dtype=np.uint8).astype(np.int32).reshape((3,) + (height, width))
+        r1 = r1[(2, 1, 0), :, :]
+        norm_val = self.config['input_tensor']['norm_val']
+        norm_shift = self.config['input_tensor']['norm_shift']
+        div_val = self.config['input_tensor']['div_val']
+        div_shift = self.config['input_tensor']['div_shift']
+        for i in [0, 1, 2]:
+            r1[i] = ((((r1[i] << norm_shift[i]) - norm_val[i]) << div_shift) // div_val[i]) & 0xff
+
+        return np.transpose(r1, (1, 2, 0)).astype(np.uint8)
+
+    def get_outputs(self, metadata: dict, add_batch=False) -> Optional[list[np.ndarray]]:
+        """Get the model outputs."""
+        output_tensor = metadata.get('CnnOutputTensor')
+        if not output_tensor:
+            return None
+
+        np_output = np.fromiter(output_tensor, dtype=np.float32)
+        output_shapes = self.get_output_shapes(metadata)
+        offset = 0
+        outputs = []
+        for tensor_shape in output_shapes:
+            size = np.prod(tensor_shape)
+            reshaped_tensor = np_output[offset:offset + size].reshape(tensor_shape, order='F')
+            if add_batch:
+                reshaped_tensor = np.expand_dims(reshaped_tensor, 0)
+            outputs.append(reshaped_tensor)
+            offset += size
+        return outputs
+
+    def get_output_shapes(self, metadata: dict) -> list[tuple[int]]:
+        """Get the model output shapes if no output return empty list."""
+        output_tensor_info = metadata.get('CnnOutputTensorInfo')
+        if not output_tensor_info:
+            return []
+        output_tensor_info = self.__get_output_tensor_info(output_tensor_info)['info']
+        return [o['size'] for o in output_tensor_info]
+
+    def set_inference_roi_abs(self, roi: tuple):
+        """
+        Set the absolute inference image crop.
+
+        Specify an absolute region of interest in the form a (left, top, width, height) crop for the input inference
+        image. The co-ordinates are based on the full sensor resolution.
+        """
+        roi = Rectangle(*roi)
+        roi = roi.bounded_to(self.__get_full_sensor_resolution())
+
+        r = (ctypes.c_uint32 * 4)()
+        r[0] = roi.x
+        r[1] = roi.y
+        r[2] = roi.width
+        r[3] = roi.height
+
+        c = (v4l2_ext_control * 1)()
+        c[0].p_u32 = r
+        c[0].id = ROI_CTRL_ID
+        c[0].size = 16
+
+        ctrl = v4l2_ext_controls()
+        ctrl.count = 1
+        ctrl.controls = c
+
+        try:
+            fcntl.ioctl(self.device_fd, VIDIOC_S_EXT_CTRLS, ctrl)
+            self.__cfg['roi'] = roi
+        except OSError as err:
+            print(f'IMX500: Unable to set ROI control in the device driver: {err}')
+
+    def set_inference_aspect_ratio(self, aspect_ratio: tuple):
+        """
+        Set the aspect ratio of the inference image.
+
+        Specify a pixel aspect ratio needed for the input inference image relative to the full sensor resolution.
+        This simply calculates an ROI based on a centre crop and calls set_inference_roi_abs().
+        """
+        f = self.__get_full_sensor_resolution()
+        r = f.size.bounded_to_aspect_ratio(Size(aspect_ratio[0], aspect_ratio[1]))
+        r = r.centered_to(f.center).enclosed_in(f)
+        self.set_inference_roi_abs(r.to_tuple())
+
+    def set_auto_aspect_ratio(self):
+        """Set the inference image crop to presereve the input tensor aspect ratio."""
+        self.set_inference_aspect_ratio(self.config['input_tensor_size'])
+
+    def __get_output_tensor_info(self, tensor_info) -> dict:
+        """Return the network string along with a list of output tensor parameters."""
+        if type(tensor_info) not in [bytes, bytearray]:
+            tensor_info = bytes(tensor_info)
+
+        size = ctypes.sizeof(CnnOutputTensorInfoExported)
+        if len(tensor_info) != size:
+            raise ValueError(f'tensor info length {len(tensor_info)} does not match expected size {size}')
+
+        # Create an instance of the struct and copy data into it
+        parsed = CnnOutputTensorInfoExported()
+        ctypes.memmove(ctypes.addressof(parsed), tensor_info, size)
+
+        result = {
+            'network_name': parsed.network_name.decode('utf-8').strip('\x00'),
+            'num_tensors': parsed.num_tensors,
+            'info': []
+        }
+
+        for t in parsed.info[0:parsed.num_tensors]:
+            info = {
+                'tensor_data_num': t.tensor_data_num,
+                'num_dimensions': t.num_dimensions,
+                'size': list(t.size)[0:t.num_dimensions],
+            }
+            result['info'].append(info)
+
+        return result
+
+    def __get_input_tensor_info(self, tensor_info) -> tuple[str, int, int, int]:
+        """Return the input tensor parameters in the form (network_name, width, height, num_channels)."""
+        NETWORK_NAME_LEN = 64
+        tensor_fmt = f'{NETWORK_NAME_LEN}sIII'
+
+        if type(tensor_info) not in [bytes, bytearray]:
+            tensor_info = bytes(tensor_info)
+
+        network_name, width, height, num_channels = struct.unpack(tensor_fmt, tensor_info)
+        network_name = network_name.decode('utf-8').rstrip('\0')
+        return (network_name, width, height, num_channels)
+
+    @staticmethod
+    def get_kpi_info(metadata: dict) -> Optional[tuple[float, float]]:
+        """Return the KPI parameters in the form (dnn_runtime, dsp_runtime)."""
+        kpi_info = metadata.get('CnnKpiInfo')
+        if kpi_info is None:
+            return None
+        if type(kpi_info) not in [bytes, bytearray]:
+            kpi_info = bytes(kpi_info)
+
+        dnn_runtime, dsp_runtime = struct.unpack('II', kpi_info)
+        return dnn_runtime / 1000, dsp_runtime / 1000
+
+    def __set_network_firmware(self, network_filename: str):
+        """Provides a firmware rpk file to upload to the IMX500. This must be called before Picamera2 is configured."""
+        if not os.path.isfile(network_filename):
+            raise RuntimeError(f'Firmware file {network_filename} does not exist.')
+
+        fd = os.open(network_filename, os.O_RDONLY)
+        if fd:
+            ctrl = v4l2_control()
+            ctrl.id = NETWORK_FW_FD_CTRL_ID
+            ctrl.value = fd
+
+            try:
+                fcntl.ioctl(self.device_fd, VIDIOC_S_CTRL, ctrl)
+                print('\n------------------------------------------------------------------------------------------------------------------\n'  # noqa
+                      'NOTE: Loading network firmware onto the IMX500 can take several minutes, please do not close down the application.'  # noqa
+                      '\n------------------------------------------------------------------------------------------------------------------\n', file=sys.stderr)  # noqa
+            except OSError as err:
+                raise RuntimeError(f'IMX500: Unable to set network firmware {network_filename}: {err}')
+            finally:
+                os.close(fd)
+
+    def __ni_from_network(self, network_filename: str):
+        """Extracts 'network_info.txt' from CPIO-archive appended to the network rpk."""
+        with open(network_filename, 'rb') as fp:
+            fw = memoryview(fp.read())
+
+        # Iterate through network firmware discarding blocks
+        cpio_offset = 0
+        while True:
+            # Parse header (+ current block size)
+            (magic, size) = struct.unpack('>4sI', fw[:8])
+            if not magic == b'9464':
+                break
+            fw = fw[size + 60:]
+            # Ensure footer is as expected
+            (magic,) = struct.unpack('4s', fw[:4])
+            if not magic == b'3695':
+                raise RuntimeError(f'No matching footer found in firmware file {network_filename}')
+            fw = fw[4:]
+            cpio_offset += size + 64
+
+        cpio_fd = os.open(network_filename, os.O_RDONLY)
+        os.lseek(cpio_fd, cpio_offset, os.SEEK_SET)
+
+        with fd_reader(cpio_fd) as archive:
+            for entry in archive:
+                if 'network_info.txt' == str(entry):
+                    self.__cfg['network_info_raw'] = b''.join(entry.get_blocks())
+                elif 'network_intrinsics' == str(entry):
+                    self.__cfg['intrinsics'] = NetworkIntrinsics(json.loads(b''.join(entry.get_blocks())))
+
+        os.close(cpio_fd)
+
+        if 'network_info_raw' not in self.__cfg:
+            return
+
+        res = {}
+        buf = io.StringIO(self.__cfg['network_info_raw'].decode('ascii'))
+        for line in buf:
+            key, value = line.strip().split('=')
+            if key == 'networkID':
+                nid: int = 0
+                for idx, x in enumerate(value):
+                    nid |= (ord(x) - ord('0')) << (20 - idx * 4)
+                res[key] = nid
+            if key == 'apParamSize':
+                res[key] = int(value)
+            if key == 'networkNum':
+                res[key] = int(value)
+
+        res['network'] = {}
+        networks = self.__cfg['network_info_raw'].decode('ascii').split('networkOrdinal=')[1:]
+        for nw in networks:
+            buf = io.StringIO(nw)
+            nw_idx = int(buf.readline())
+            nw_properties = {}
+            for line in buf:
+                key, value = line.strip().split('=')
+                nw_properties[key] = value
+            res['network'][nw_idx] = nw_properties
+
+        if len(res['network']) != res['networkNum']:
+            raise RuntimeError('Insufficient networkNum settings in network_info.txt')
+
+        self.__cfg['network_info'] = res
+
+        # Extract some input tensor config params
+        self.__cfg['input_tensor']['width'] = int(res['network'][0]['inputTensorWidth'])
+        self.__cfg['input_tensor']['height'] = int(res['network'][0]['inputTensorHeight'])
+        self.__cfg['input_tensor_size'] = (self.config['input_tensor']['width'],
+                                           self.config['input_tensor']['height'])
+
+        input_format = self.__cfg['network_info']['network'][0]['inputTensorFormat']
+        inputTensorNorm_K03 = int(self.__cfg['network_info']['network'][0]['inputTensorNorm_K03'], 0)
+        inputTensorNorm_K13 = int(self.__cfg['network_info']['network'][0]['inputTensorNorm_K13'], 0)
+        inputTensorNorm_K23 = int(self.__cfg['network_info']['network'][0]['inputTensorNorm_K23'], 0)
+        inputTensorNorm_K00 = int(self.__cfg['network_info']['network'][0]['inputTensorNorm_K00'], 0)
+        inputTensorNorm_K22 = int(self.__cfg['network_info']['network'][0]['inputTensorNorm_K22'], 0)
+        inputTensorNorm_K02 = int(self.__cfg['network_info']['network'][0]['inputTensorNorm_K02'], 0)
+        inputTensorNorm_K20 = int(self.__cfg['network_info']['network'][0]['inputTensorNorm_K20'], 0)
+        inputTensorNorm_K11 = int(self.__cfg['network_info']['network'][0]['inputTensorNorm_K11'], 0)
+
+        self.__cfg['input_tensor']['input_format'] = input_format
+
+        if input_format == 'RGB' or input_format == 'BGR':
+            norm_val_0 = \
+                inputTensorNorm_K03 if ((inputTensorNorm_K03 >> 12) & 1) == 0 else -((~inputTensorNorm_K03 + 1) & 0x1fff)
+            norm_val_1 = \
+                inputTensorNorm_K13 if ((inputTensorNorm_K13 >> 12) & 1) == 0 else -((~inputTensorNorm_K13 + 1) & 0x1fff)
+            norm_val_2 = \
+                inputTensorNorm_K23 if ((inputTensorNorm_K23 >> 12) & 1) == 0 else -((~inputTensorNorm_K23 + 1) & 0x1fff)
+            norm_val = [norm_val_0, norm_val_1, norm_val_2]
+            self.__cfg['input_tensor']['norm_val'] = norm_val
+            norm_shift = [4, 4, 4]
+            self.__cfg['input_tensor']['norm_shift'] = norm_shift
+            if input_format == 'RGB':
+                div_val_0 = \
+                    inputTensorNorm_K00 if ((inputTensorNorm_K00 >> 11) & 1) == 0 else -((~inputTensorNorm_K00 + 1) & 0x0fff)
+                div_val_2 =\
+                    inputTensorNorm_K22 if ((inputTensorNorm_K22 >> 11) & 1) == 0 else -((~inputTensorNorm_K22 + 1) & 0x0fff)
+            else:
+                div_val_0 = \
+                    inputTensorNorm_K02 if ((inputTensorNorm_K02 >> 11) & 1) == 0 else -((~inputTensorNorm_K02 + 1) & 0x0fff)
+                div_val_2 = \
+                    inputTensorNorm_K20 if ((inputTensorNorm_K20 >> 11) & 1) == 0 else -((~inputTensorNorm_K20 + 1) & 0x0fff)
+            div_val_1 = \
+                inputTensorNorm_K11 if ((inputTensorNorm_K11 >> 11) & 1) == 0 else -((~inputTensorNorm_K11 + 1) & 0x0fff)
+            self.__cfg['input_tensor']['div_val'] = [div_val_0, div_val_1, div_val_2]
+            self.__cfg['input_tensor']['div_shift'] = 6
diff --git a/picamera2/devices/imx500/postprocess.py b/picamera2/devices/imx500/postprocess.py
new file mode 100644
index 00000000..ee1c825e
--- /dev/null
+++ b/picamera2/devices/imx500/postprocess.py
@@ -0,0 +1,414 @@
+"""
+This code is based on multiple sources:
+
+https://github.com/rbgirshick/fast-rcnn
+https://github.com/ultralytics/ultralytics
+https://github.com/see--/keras-centernet
+https://github.com/stefanopini/simple-HigherHRNet
+"""
+
+from enum import Enum
+from typing import List
+
+import cv2
+import numpy as np
+
+from picamera2 import Picamera2
+
+
+def nms(dets: np.ndarray, scores: np.ndarray, iou_thres: float = 0.55, max_out_dets: int = 50) -> List[int]:
+    """
+    Perform Non-Maximum Suppression (NMS) on detected bounding boxes.
+
+    Args:
+        dets (np.ndarray): Array of bounding box coordinates of shape (N, 4) representing [y1, x1, y2, x2].
+        scores (np.ndarray): Array of confidence scores associated with each bounding box.
+        iou_thres (float, optional): IoU threshold for NMS. Default is 0.5.
+        max_out_dets (int, optional): Maximum number of output detections to keep. Default is 300.
+
+    Returns:
+        List[int]: List of indices representing the indices of the bounding boxes to keep after NMS.
+
+    """
+    y1, x1 = dets[:, 0], dets[:, 1]
+    y2, x2 = dets[:, 2], dets[:, 3]
+    areas = (x2 - x1 + 1) * (y2 - y1 + 1)
+    order = scores.argsort()[::-1]
+
+    keep = []
+    while order.size > 0:
+        i = order[0]
+        keep.append(i)
+        xx1 = np.maximum(x1[i], x1[order[1:]])
+        yy1 = np.maximum(y1[i], y1[order[1:]])
+        xx2 = np.minimum(x2[i], x2[order[1:]])
+        yy2 = np.minimum(y2[i], y2[order[1:]])
+
+        w = np.maximum(0.0, xx2 - xx1 + 1)
+        h = np.maximum(0.0, yy2 - yy1 + 1)
+        inter = w * h
+        ovr = inter / (areas[i] + areas[order[1:]] - inter)
+
+        inds = np.where(ovr <= iou_thres)[0]
+        order = order[inds + 1]
+
+    return keep[:max_out_dets]
+
+
+def combined_nms(batch_boxes, batch_scores, iou_thres: float = 0.65, conf: float = 0.55, max_out_dets: int = 50):
+    nms_results = []
+    for boxes, scores in zip(batch_boxes, batch_scores):
+        xc = np.argmax(scores, 1)
+        xs = np.amax(scores, 1)
+        x = np.concatenate([boxes, np.expand_dims(xs, 1), np.expand_dims(xc, 1)], 1)
+
+        xi = xs > conf
+        x = x[xi]
+
+        x = x[np.argsort(-x[:, 4])[:8400]]
+        scores = x[:, 4]
+        x[..., :4] = convert_to_ymin_xmin_ymax_xmax_format(x[..., :4], BoxFormat.XC_YC_W_H)
+        offset = x[:, 5] * 640
+        boxes = x[..., :4] + np.expand_dims(offset, 1)
+
+        # Original post-processing part
+        valid_indexs = nms(boxes, scores, iou_thres=iou_thres, max_out_dets=max_out_dets)
+        x = x[valid_indexs]
+        nms_classes = x[:, 5]
+        nms_bbox = x[:, :4]
+        nms_scores = x[:, 4]
+
+        nms_results.append((nms_bbox, nms_scores, nms_classes))
+
+    return nms_results
+
+
+def combined_nms_seg(batch_boxes, batch_scores, batch_masks, iou_thres: float = 0.5, conf: float = 0.001,
+                     max_out_dets: int = 300):
+    nms_results = []
+    for boxes, scores, masks in zip(batch_boxes, batch_scores, batch_masks):
+        # Compute maximum scores and corresponding class indices
+        class_indices = np.argmax(scores, axis=1)
+        max_scores = np.amax(scores, axis=1)
+        detections = np.concatenate([boxes, np.expand_dims(max_scores, axis=1), np.expand_dims(class_indices, axis=1)],
+                                    axis=1)
+
+        # Swap the position of the two dimensions (32, 8400) to (8400, 32)
+        masks = np.transpose(masks, (1, 0))
+        # Filter out detections below the confidence threshold
+        valid_detections = max_scores > conf
+
+        if np.all(valid_detections is False):
+            nms_results.append((np.ndarray(0), np.ndarray(0), np.ndarray(0), np.ndarray(0)))
+        else:
+
+            detections = detections[valid_detections]
+            masks = masks[valid_detections]
+
+            # Sort detections by score in descending order
+            sorted_indices = np.argsort(-detections[:, 4])
+            detections = detections[sorted_indices]
+            masks = masks[sorted_indices]
+
+            detections[..., :4] = convert_to_ymin_xmin_ymax_xmax_format(detections[..., :4], BoxFormat.XC_YC_W_H)
+
+            # Perform class-wise NMS
+            unique_classes = np.unique(detections[:, 5])
+            final_indices = []
+
+            for cls in unique_classes:
+                cls_indices = np.where(detections[:, 5] == cls)[0]
+                cls_boxes = detections[cls_indices, :4]
+                cls_scores = detections[cls_indices, 4]
+                cls_valid_indices = nms(cls_boxes, cls_scores, iou_thres=iou_thres, max_out_dets=max_out_dets)
+                final_indices.extend(cls_indices[cls_valid_indices])
+
+            final_indices = np.array(final_indices)
+            final_detections = detections[final_indices]
+            final_masks = masks[final_indices]
+
+            # Extract class indices, bounding boxes, and scores
+            nms_classes = final_detections[:, 5]
+            nms_bbox = final_detections[:, :4]
+            nms_scores = final_detections[:, 4]
+
+            # Append results including masks
+            nms_results.append((nms_bbox, nms_scores, nms_classes, final_masks))
+    return nms_results
+
+
+class BoxFormat(Enum):
+    YMIM_XMIN_YMAX_XMAX = 'ymin_xmin_ymax_xmax'
+    XMIM_YMIN_XMAX_YMAX = 'xmin_ymin_xmax_ymax'
+    XMIN_YMIN_W_H = 'xmin_ymin_width_height'
+    XC_YC_W_H = 'xc_yc_width_height'
+
+
+def convert_to_ymin_xmin_ymax_xmax_format(boxes, orig_format: BoxFormat):
+    """
+    Changes the box from one format to another (XMIN_YMIN_W_H --> YMIM_XMIN_YMAX_XMAX )
+
+    Also support in same format mode (returns the same format)
+
+    :param boxes:
+    :param orig_format:
+    :return: box in format YMIM_XMIN_YMAX_XMAX
+    """
+    if len(boxes) == 0:
+        return boxes
+    elif orig_format == BoxFormat.YMIM_XMIN_YMAX_XMAX:
+        return boxes
+    elif orig_format == BoxFormat.XMIN_YMIN_W_H:
+        boxes[:, 2] += boxes[:, 0]  # convert width to xmax
+        boxes[:, 3] += boxes[:, 1]  # convert height to ymax
+        boxes[:, 0], boxes[:, 1] = boxes[:, 1], boxes[:, 0].copy()  # swap xmin, ymin columns
+        boxes[:, 2], boxes[:, 3] = boxes[:, 3], boxes[:, 2].copy()  # swap xmax, ymax columns
+        return boxes
+    elif orig_format == BoxFormat.XMIM_YMIN_XMAX_YMAX:
+        boxes[:, 0], boxes[:, 1] = boxes[:, 1], boxes[:, 0].copy()  # swap xmin, ymin columns
+        boxes[:, 2], boxes[:, 3] = boxes[:, 3], boxes[:, 2].copy()  # swap xmax, ymax columns
+        return boxes
+    elif orig_format == BoxFormat.XC_YC_W_H:
+        new_boxes = np.copy(boxes)
+        new_boxes[:, 0] = boxes[:, 1] - boxes[:, 3] / 2  # top left y
+        new_boxes[:, 1] = boxes[:, 0] - boxes[:, 2] / 2  # top left x
+        new_boxes[:, 2] = boxes[:, 1] + boxes[:, 3] / 2  # bottom right y
+        new_boxes[:, 3] = boxes[:, 0] + boxes[:, 2] / 2  # bottom right x
+        return new_boxes
+    else:
+        raise Exception("Unsupported boxes format")
+
+
+def clip_boxes(boxes: np.ndarray, h: int, w: int) -> np.ndarray:
+    """
+    Clip bounding boxes to stay within the image boundaries.
+
+    Args:
+        boxes (numpy.ndarray): Array of bounding boxes in format [y_min, x_min, y_max, x_max].
+        h (int): Height of the image.
+        w (int): Width of the image.
+
+    Returns:
+        numpy.ndarray: Clipped bounding boxes.
+    """
+    boxes[..., 0] = np.clip(boxes[..., 0], a_min=0, a_max=h)
+    boxes[..., 1] = np.clip(boxes[..., 1], a_min=0, a_max=w)
+    boxes[..., 2] = np.clip(boxes[..., 2], a_min=0, a_max=h)
+    boxes[..., 3] = np.clip(boxes[..., 3], a_min=0, a_max=w)
+    return boxes
+
+
+def scale_boxes(boxes: np.ndarray, h_image: int, w_image: int, h_model: int, w_model: int, preserve_aspect_ratio: bool,
+                normalized: bool = True) -> np.ndarray:
+    """
+    Scale and offset bounding boxes based on model output size and original image size.
+
+    Args:
+        boxes (numpy.ndarray): Array of bounding boxes in format [y_min, x_min, y_max, x_max].
+        h_image (int): Original image height.
+        w_image (int): Original image width.
+        h_model (int): Model output height.
+        w_model (int): Model output width.
+        preserve_aspect_ratio (bool): Whether to preserve image aspect ratio during scaling
+
+    Returns:
+        numpy.ndarray: Scaled and offset bounding boxes.
+    """
+    deltaH, deltaW = 0, 0
+    H, W = h_model, w_model
+    scale_H, scale_W = h_image / H, w_image / W
+
+    if preserve_aspect_ratio:
+        scale_H = scale_W = max(h_image / H, w_image / W)
+        H_tag = int(np.round(h_image / scale_H))
+        W_tag = int(np.round(w_image / scale_W))
+        deltaH, deltaW = int((H - H_tag) / 2), int((W - W_tag) / 2)
+
+    nh, nw = (H, W) if normalized else (1, 1)
+
+    # Scale and offset boxes
+    # [y_min, x_min, y_max, x_max].
+    boxes[..., 0] = (boxes[..., 0] * nw - deltaW) * scale_W
+    boxes[..., 1] = (boxes[..., 1] * nh - deltaH) * scale_H
+    boxes[..., 2] = (boxes[..., 2] * nw - deltaW) * scale_W
+    boxes[..., 3] = (boxes[..., 3] * nh - deltaH) * scale_H
+
+    # Clip boxes
+    boxes = clip_boxes(boxes, h_image, w_image)
+
+    return boxes
+
+
+def scale_coords(kpts: np.ndarray, h_image: int, w_image: int, h_model: int, w_model: int,
+                 preserve_aspect_ratio: bool) -> np.ndarray:
+    """
+    Scale and offset keypoints based on model output size and original image size.
+
+    Args:
+        kpts (numpy.ndarray): Array of bounding keypoints in format [..., 17, 3]  where the last dim is (x, y, visible).
+        h_image (int): Original image height.
+        w_image (int): Original image width.
+        h_model (int): Model output height.
+        w_model (int): Model output width.
+        preserve_aspect_ratio (bool): Whether to preserve image aspect ratio during scaling
+
+    Returns:
+        numpy.ndarray: Scaled and offset bounding boxes.
+    """
+    deltaH, deltaW = 0, 0
+    H, W = h_model, w_model
+    scale_H, scale_W = h_image / H, w_image / W
+
+    if preserve_aspect_ratio:
+        scale_H = scale_W = max(h_image / H, w_image / W)
+        H_tag = int(np.round(h_image / scale_H))
+        W_tag = int(np.round(w_image / scale_W))
+        deltaH, deltaW = int((H - H_tag) / 2), int((W - W_tag) / 2)
+
+    # Scale and offset boxes
+    kpts[..., 0] = (kpts[..., 0] - deltaH) * scale_H
+    kpts[..., 1] = (kpts[..., 1] - deltaW) * scale_W
+
+    # Clip boxes
+    kpts = clip_coords(kpts, h_image, w_image)
+
+    return kpts
+
+
+def clip_coords(kpts: np.ndarray, h: int, w: int) -> np.ndarray:
+    """
+    Clip keypoints to stay within the image boundaries.
+
+    Args:
+        kpts (numpy.ndarray): Array of bounding keypoints in format [..., 17, 3]  where the last dim is (x, y, visible).
+        h (int): Height of the image.
+        w (int): Width of the image.
+
+    Returns:
+        numpy.ndarray: Clipped bounding boxes.
+    """
+    kpts[..., 0] = np.clip(kpts[..., 0], a_min=0, a_max=h)
+    kpts[..., 1] = np.clip(kpts[..., 1], a_min=0, a_max=w)
+    return kpts
+
+
+PARTS = {
+    0: 'Nose',
+    1: 'EyeL',
+    2: 'EyeR',
+    3: 'EarL',
+    4: 'EarR',
+    5: 'SholderL',
+    6: 'SholderR',
+    7: 'ElbowL',
+    8: 'ElbowR',
+    9: 'WristL',
+    10: 'WristR',
+    11: 'HipL',
+    12: 'HipR',
+    13: 'KneeL',
+    14: 'KneeR',
+    15: 'AnkleL',
+    16: 'AnkleR'
+}
+
+
+class COCODrawer:
+    def __init__(self, categories, imx500, needs_rescale_coords=True):
+        self.categories = categories
+        self.imx500 = imx500
+        self.needs_rescale_coords = needs_rescale_coords
+
+    def get_coords(self, annotation, metadata: dict, picam2: Picamera2, stream):
+        if self.needs_rescale_coords:
+            obj_scaled = self.imx500.convert_inference_coords(annotation, metadata, picam2, stream)
+            x0 = obj_scaled.x
+            y0 = obj_scaled.y
+            x1 = x0 + obj_scaled.width
+            y1 = y0 + obj_scaled.height
+        else:
+            y0, x0, y1, x1 = annotation
+            y0 = max(0, y0)
+            x0 = max(0, x0)
+        return int(y0), int(x0), int(y1), int(x1)
+
+    def draw_bounding_box(self, img, annotation, class_id, score, metadata: dict, picam2: Picamera2, stream):
+        y0, x0, y1, x1 = self.get_coords(annotation, metadata, picam2, stream)
+        text = f"{self.categories[int(class_id)]}:{score:.3f}"
+        cv2.rectangle(img, (x0, y0), (x1, y1), (0, 0, 255), 2)
+        cv2.putText(img, text, (x0, y0), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
+
+    def draw_keypoints(self, img, keypoints, min_confidence, metadata: dict, picam2: Picamera2, stream):
+        def get_point(index):
+            y0, x0 = keypoints[index][1], keypoints[index][0]
+            y0, x0, _, _ = self.get_coords((y0, x0, y0 + 1, x0 + 1), metadata, picam2, stream)
+            return x0, y0
+
+        skeleton = [
+            [0, 1], [0, 2], [1, 3], [2, 4],  # Head
+            [5, 6], [5, 7], [7, 9], [6, 8],  # Arms
+            [8, 10], [5, 11], [6, 12], [11, 12],  # Body
+            [11, 13], [12, 14], [13, 15], [14, 16]  # Legs
+        ]
+
+        # Draw skeleton lines
+        for connection in skeleton:
+            start_point = get_point(connection[0])
+            end_point = get_point(connection[1])
+            start_confidence = keypoints[connection[0]][2]
+            end_confidence = keypoints[connection[1]][2]
+            if start_confidence < min_confidence or end_confidence < min_confidence:
+                continue
+            cv2.line(img, start_point, end_point, (255, 0, 0), 2)
+
+        # Draw keypoints as colored circles
+        for i in range(len(keypoints)):
+            x, y = get_point(i)
+            confidence = keypoints[i][2]
+            if confidence < min_confidence:
+                continue
+            cv2.circle(img, (x, y), 3, (0, 255, 0), -1)
+            label = f"{PARTS.get(i)}.{confidence:.3f}"
+            cv2.putText(img, label, (x + 5, y + 15), cv2.FONT_HERSHEY_SIMPLEX, 0.25, (0, 255, 0), 1)
+
+    def annotate_image(self, img, b, s, c, k, box_min_conf, kps_min_conf, metadata: dict, picam2: Picamera2, stream):
+        for index, row in enumerate(b):
+            if s[index] >= box_min_conf:
+                self.draw_bounding_box(img, row, c[index], s[index], metadata, picam2, stream)
+                if k is not None:
+                    self.draw_keypoints(img, k[index], kps_min_conf, metadata, picam2, stream)
+
+    def overlay_masks(self, picam2, masks, scores, colors, score_threshold=0.55, mask_threshold=0.5):
+        overlay = np.zeros((masks.shape[1], masks.shape[2], 4), dtype=np.uint8)
+        for idx, (mask, score) in enumerate(zip(masks, scores)):
+            if score > score_threshold:  # Check if the score is above the threshold
+                binary_mask = (mask > mask_threshold).astype(np.uint8)
+                color = np.array(colors[idx][:3]) * 255  # Convert color to 0-255 scale
+                overlay[binary_mask == 1, :3] = color
+                overlay[binary_mask == 1, 3] = 127  # opacity
+        picam2.set_overlay(overlay)
+
+
+def softmax(x):
+    y = np.exp(x - np.expand_dims(np.max(x, axis=-1), axis=-1))
+    z = y / np.expand_dims(np.sum(y, axis=-1), axis=-1)
+    return z
+
+
+def crop_mask(masks, boxes):
+    """
+    It takes a mask and a bounding box, and returns a mask that is cropped to the bounding box
+
+    Args:
+      masks (numpy.ndarray): [h, w, n] tensor of masks
+      boxes (numpy.ndarray): [n, 4] tensor of bbox coordinates in relative point form
+
+    Returns:
+      (numpy.ndarray): The masks are being cropped to the bounding box.
+    """
+    n, w, h = masks.shape
+    x1, y1, x2, y2 = np.split(boxes[:, :, None], 4, 1)
+    c = np.arange(h, dtype=np.float32)[None, None, :]  # rows shape(1,w,1)
+    r = np.arange(w, dtype=np.float32)[None, :, None]  # cols shape(h,1,1)
+
+    return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2))
diff --git a/picamera2/devices/imx500/postprocess_efficientdet_lite0.py b/picamera2/devices/imx500/postprocess_efficientdet_lite0.py
new file mode 100644
index 00000000..3ffd7fd4
--- /dev/null
+++ b/picamera2/devices/imx500/postprocess_efficientdet_lite0.py
@@ -0,0 +1,213 @@
+"""
+Efficientdet postprocessing
+
+This code is based on:
+https://github.com/google/automl/tree/master/efficientdet
+"""
+
+from typing import Tuple
+
+import numpy as np
+
+from picamera2.devices.imx500.postprocess import (
+    BoxFormat, convert_to_ymin_xmin_ymax_xmax_format, nms)
+from picamera2.devices.imx500.postprocess_yolov5 import coco80_to_coco91
+
+default_box_variance = [1.0, 1.0, 1.0, 1.0]
+default_aspect_ratios = [1.0, 2.0, 0.5]
+
+
+def postprocess_efficientdet_lite0_detection(outputs: Tuple[np.ndarray, np.ndarray, np.ndarray],
+                                             anchor_scale=3,
+                                             min_level=3,
+                                             max_level=7,
+                                             box_variance=default_box_variance,
+                                             model_input_shape=(320, 320),
+                                             min_wh=2,
+                                             max_wh=7680,
+                                             conf_thres: float = 0.001,
+                                             iou_thres: float = 0.65,
+                                             max_nms_dets: int = 5000,
+                                             max_out_dets: int = 1000):
+    H, W = model_input_shape
+    ############################################################
+    # Box decoding
+    ############################################################
+    outputs_decoded = box_decoding_edetlite(output_annotations=outputs,
+                                            H=H,
+                                            W=W,
+                                            anchor_scale=anchor_scale,
+                                            min_level=min_level,
+                                            max_level=max_level,
+                                            box_variance=box_variance)
+
+    classes = outputs[0]
+    num_categories = classes.shape[-1]
+
+    ############################################################
+    # Post processing for each input image
+    ############################################################
+    # Note: outputs_decoded shape is [Batch,num_anchors*Detections,(4+1+num_categories)]
+    post_processed_outputs = []
+    for _, x in enumerate(outputs_decoded):
+        # ----------------------------------------
+        # Filter by score and width-height
+        # ----------------------------------------
+        scores = x[..., 4]
+        wh = x[..., 2:4]
+        valid_indexs = (scores > conf_thres) & ((wh > min_wh).any(1)) & ((wh < max_wh).any(1))
+        x = x[valid_indexs]
+
+        # ----------------------------------------
+        # Taking Best class only
+        # ----------------------------------------
+        x[..., 5:] *= x[..., 4:5]  # compute confidence per class (class_score * object_score)
+        conf = np.max(x[:, 5:], axis=1, keepdims=True)
+        classes_id = np.argmax(x[:, 5:], axis=1, keepdims=True)
+
+        # Change boxes format from [x_c,y_c,w,h] to [y_min,x_min,y_max,x_max]
+        boxes = convert_to_ymin_xmin_ymax_xmax_format(x[..., :4], BoxFormat.XC_YC_W_H)
+        x = np.concatenate((boxes, conf, classes_id), axis=1)[conf.reshape(-1) > conf_thres]
+
+        # --------------------------- #
+        # NMS
+        # --------------------------- #
+        x = x[np.argsort(-x[:, 4])[:max_nms_dets]]  # sort by confidence from high to low
+        offset = x[..., 5:6] * np.maximum(H, W)
+        boxes_offset, scores = x[..., :4] + offset, x[..., 4]  # boxes with offset by class
+        valid_indexs = nms(dets=boxes_offset, scores=scores, iou_thres=iou_thres, max_out_dets=max_out_dets)
+        x = x[valid_indexs]
+
+        boxes = x[..., :4]
+
+        # --------------------------- #
+        # Classes process
+        # --------------------------- #
+        # convert classes from coco80 to coco91 to match labels
+        classes = coco80_to_coco91(x[..., 5]) if num_categories == 80 else x[..., 5]
+        classes -= 0
+
+        # --------------------------- #
+        # Scores
+        # --------------------------- #
+        scores = x[..., 4]
+
+        # Add result
+        post_processed_outputs.append({'boxes': boxes, 'classes': classes, 'scores': scores})
+
+    return post_processed_outputs[0]['boxes'], post_processed_outputs[0]['scores'], post_processed_outputs[0]['classes']
+
+
+def box_decoding_edetlite(output_annotations,
+                          H=320,
+                          W=320,
+                          anchor_scale=3,
+                          min_level=3,
+                          max_level=7,
+                          box_variance=default_box_variance):
+    # -----------------------------------------------
+    # EfficientDetLite detection post processing
+    # -----------------------------------------------
+    # Note: 'output_annotations' is expected to be a list of 2 feature maps with shapes:
+    # [0] : [Batch,Detections,num_categories]
+    # [1] : [Batch,Detections,4]
+    classes = output_annotations[0]
+    boxes = output_annotations[1]
+    classes = 1 / (1 + np.exp(-classes))  # sigmoid
+    scores = np.ones((*boxes.shape[:-1], 1))  # Add default object scores of 1.0
+
+    # Combine tensors
+    outputs = np.concatenate((boxes, scores, classes), axis=2)
+
+    # Box decoding
+    # Anchor boxes format: [y_min, x_min, y_max, x_max] normalized
+
+    # Extract feature map sizes
+    strides = [2 ** i for i in range(max_level + 1)]
+    featmap_sizes = [(np.ceil(H / stride), np.ceil(W / stride)) for stride in strides]
+
+    # Generate priors
+    batch_size = outputs.shape[0]
+    anchors = generate_anchors_EDETLITE(batch_size=batch_size,
+                                        featmap_sizes=featmap_sizes,
+                                        H=H,
+                                        W=W,
+                                        anchor_scale=anchor_scale,
+                                        min_level=min_level,
+                                        max_level=max_level)
+
+    # Decode bboxes
+    y_c_anchors = (anchors[..., 0:1] + anchors[..., 2:3]) / 2
+    x_c_anchors = (anchors[..., 1:2] + anchors[..., 3:4]) / 2
+    ha = anchors[..., 2:3] - anchors[..., 0:1]
+    wa = anchors[..., 3:4] - anchors[..., 1:2]
+
+    # Output Box format: [x_c, y_c, w, h]
+    pred_boxes = outputs[..., :4]
+    y_c = pred_boxes[..., 0:1] * box_variance[0] * ha + y_c_anchors
+    x_c = pred_boxes[..., 1:2] * box_variance[1] * wa + x_c_anchors
+    h = np.exp(pred_boxes[..., 2:3] * box_variance[2]) * ha
+    w = np.exp(pred_boxes[..., 3:4] * box_variance[3]) * wa
+    outputs[..., 0:1] = x_c
+    outputs[..., 1:2] = y_c
+    outputs[..., 2:3] = w
+    outputs[..., 3:4] = h
+    return outputs
+
+
+def generate_anchors_EDETLITE(batch_size,
+                              featmap_sizes,
+                              H=320,
+                              W=320,
+                              anchor_scale=3,
+                              min_level=3,
+                              max_level=7,
+                              aspect_ratios=default_aspect_ratios):
+    """Generate configurations of anchor boxes."""
+    anchor_scales = [anchor_scale] * (max_level - min_level + 1)
+    num_scales = len(aspect_ratios)
+    anchor_configs = {}
+    for level in range(min_level, max_level + 1):
+        anchor_configs[level] = []
+        for scale_octave in range(num_scales):
+            for aspect in aspect_ratios:
+                anchor_configs[level].append(
+                    ((featmap_sizes[0][0] / float(featmap_sizes[level][0]),
+                      featmap_sizes[0][1] / float(featmap_sizes[level][1])),
+                     scale_octave / float(num_scales), aspect,
+                     anchor_scales[level - min_level]))
+
+    """Generates multiscale anchor boxes."""
+    boxes_all = []
+    for _, configs in anchor_configs.items():
+        boxes_level = []
+        for config in configs:
+            stride, octave_scale, aspect, anchor_scale = config
+            base_anchor_size_x = anchor_scale * stride[1] * 2 ** octave_scale
+            base_anchor_size_y = anchor_scale * stride[0] * 2 ** octave_scale
+            if isinstance(aspect, list):
+                aspect_x, aspect_y = aspect
+            else:
+                aspect_x = np.sqrt(aspect)
+                aspect_y = 1.0 / aspect_x
+            anchor_size_x_2 = base_anchor_size_x * aspect_x / 2.0
+            anchor_size_y_2 = base_anchor_size_y * aspect_y / 2.0
+
+            x = np.arange(stride[1] / 2, W, stride[1])
+            y = np.arange(stride[0] / 2, H, stride[0])
+            xv, yv = np.meshgrid(x, y)
+            xv = xv.reshape(-1)
+            yv = yv.reshape(-1)
+
+            boxes = np.vstack((yv - anchor_size_y_2, xv - anchor_size_x_2,
+                               yv + anchor_size_y_2, xv + anchor_size_x_2))
+            boxes = np.swapaxes(boxes, 0, 1)
+            boxes_level.append(np.expand_dims(boxes, axis=1))
+
+        # concat anchors on the same level to the shape Batch x Detections x 4
+        boxes_level = np.concatenate(boxes_level, axis=1).reshape([1, -1, 4])
+        boxes_level = np.repeat(boxes_level, batch_size, axis=0)
+        boxes_all.append(boxes_level)
+
+    anchor_boxes = np.concatenate(boxes_all, axis=1)
+    return anchor_boxes
diff --git a/picamera2/devices/imx500/postprocess_highernet.py b/picamera2/devices/imx500/postprocess_highernet.py
new file mode 100644
index 00000000..6cd97539
--- /dev/null
+++ b/picamera2/devices/imx500/postprocess_highernet.py
@@ -0,0 +1,562 @@
+"""
+Highernet postprocessing
+
+This code is based on multiple sources:
+https://github.com/HRNet/HigherHRNet-Human-Pose-Estimation
+https://github.com/princeton-vl/pose-ae-train
+https://github.com/yinguobing/facial-landmark-detection-hrnet
+"""
+
+from typing import Tuple
+
+import cv2
+import numpy as np
+
+try:
+    from munkres import Munkres
+except ImportError:
+    raise ImportError("Please install munkres first. `pip3 install --break-system-packages munkres`")
+
+default_joint_order = [0, 1, 2, 3, 4, 5, 6, 11, 12, 7, 8, 9, 10, 13, 14, 15, 16]
+
+
+def postprocess_higherhrnet(outputs: list[np.ndarray, np.ndarray],
+                            img_size,
+                            img_w_pad,
+                            img_h_pad,
+                            network_postprocess,
+                            num_joints=17,
+                            tag_per_joint=True,
+                            joint_order=default_joint_order,
+                            detection_threshold=0.3,
+                            max_num_people=30,
+                            nms_kernel=5,
+                            nms_padding=2,
+                            ignore_too_much=False,
+                            use_detection_val=True,
+                            tag_threshold=1.0,
+                            adjust=False,
+                            refine=False,
+                            input_image_size=(288, 384),
+                            output_shape=(144, 192)) -> Tuple[list[list], list, list[list]]:
+    all_preds = []
+    all_scores = []
+    if network_postprocess:
+        # outputs [[B, max_num_people, num_joints], [B, max_num_people, num_joints], [B, max_num_people, num_joints]]
+        grouped, scores = parse(network_outputs=[outputs[0][0, ...],
+                                                 outputs[1][0, ...],
+                                                 outputs[2][0, ...]],
+                                output_shape=output_shape,
+                                adjust=adjust,
+                                refine=refine,
+                                network_postprocess=network_postprocess,
+                                tag_per_joint=tag_per_joint,
+                                max_num_people=max_num_people,
+                                nms_kernel=nms_kernel,
+                                nms_padding=nms_padding,
+                                num_joints=num_joints,
+                                joint_order=joint_order,
+                                detection_threshold=detection_threshold,
+                                ignore_too_much=ignore_too_much,
+                                use_detection_val=use_detection_val,
+                                tag_threshold=tag_threshold)
+    else:
+        out0 = outputs[0][0]
+        out1 = outputs[1][0]
+
+        # postprocess:
+        # resize first output to 2nd output size
+        out0 = ResizeBilinear(out0, out1.shape[0], out1.shape[1])
+        # average heatmaps from both outputs
+        heatmaps = (out0[..., :17] + out1) / 2
+        tags = out0[..., 17:]
+        grouped, scores = parse(network_outputs=[heatmaps, tags],
+                                output_shape=output_shape,
+                                adjust=adjust,
+                                refine=refine,
+                                network_postprocess=network_postprocess,
+                                tag_per_joint=tag_per_joint,
+                                max_num_people=max_num_people,
+                                nms_kernel=nms_kernel,
+                                nms_padding=nms_padding,
+                                num_joints=num_joints,
+                                joint_order=joint_order,
+                                detection_threshold=detection_threshold,
+                                ignore_too_much=ignore_too_much,
+                                use_detection_val=use_detection_val,
+                                tag_threshold=tag_threshold)
+
+    # scale keypoints coordinates to input image size
+    scale_factor = (np.array(input_image_size) / output_shape).reshape((1, 1, 2))
+    for img_index in range(len(grouped)):
+        if grouped[img_index].shape[0] > 0:
+            # rescale to preprocessed input image size
+            grouped[img_index][:, :, :2] = grouped[img_index][:, :, :2] * scale_factor
+            # remove pad offset:
+            grouped[img_index][:, :, 0] = grouped[img_index][:, :, 0] - img_w_pad[0]
+            grouped[img_index][:, :, 1] = grouped[img_index][:, :, 1] - img_h_pad[0]
+            # rescale to original image size
+            resized_input_image = np.array(input_image_size) - np.array(
+                (sum(img_h_pad),
+                 sum(img_w_pad)))
+            s = (np.array(img_size) / resized_input_image).reshape((1, 1, 2))
+            grouped[img_index][:, :, :2] = grouped[img_index][:, :, :2] * s
+
+    # Calculate zero keypoint
+    zero_kpt = np.zeros((1, 4))
+    resized_input_image = np.array(input_image_size) - np.array(
+        (sum(img_h_pad),
+         sum(img_w_pad)))
+    s = (np.array(img_size) / resized_input_image).reshape((1, 1, 2))
+    zero_kpt[:, 0] = zero_kpt[:, 0] - img_w_pad[0]
+    zero_kpt[:, 1] = zero_kpt[:, 1] - img_h_pad[0]
+    zero_kpt[:, :2] = zero_kpt[:, :2] * s
+
+    all_preds.append(grouped)
+    all_scores.append(scores)
+
+    kpts = []
+    # one image, one iter
+    for idx, _kpts in enumerate(all_preds):
+        for idx_kpt, kpt in enumerate(_kpts[0]):
+            area = (np.max(kpt[:, 0]) - np.min(kpt[:, 0])) * (np.max(kpt[:, 1]) - np.min(kpt[:, 1]))
+            # kpt [17, 4]
+            kpt = processKeypoints(kpt)
+            kpts.append(
+                {
+                    'keypoints': kpt[:, 0:3],
+                    'score': all_scores[idx][idx_kpt],
+                    'tags': kpt[:, 3],
+                    'area': area
+                }
+            )
+    # _coco_keypoint_results_one_category_kernel
+    out_keypoints = []
+    out_scores = []
+    out_bbox = []
+
+    # for img_kpts in kpts:
+    img_kpts = kpts
+    if len(img_kpts) == 0:
+        return [], [], []
+
+    _key_points = np.array(
+        [img_kpts[k]['keypoints'] for k in range(len(img_kpts))]
+    )
+    key_points = np.zeros(
+        (_key_points.shape[0], num_joints * 3),
+        dtype=np.float32
+    )
+
+    for ipt in range(num_joints):
+        key_points[:, ipt * 3 + 0] = _key_points[:, ipt, 0]
+        key_points[:, ipt * 3 + 1] = _key_points[:, ipt, 1]
+        key_points[:, ipt * 3 + 2] = _key_points[:, ipt, 2]  # keypoints score.
+
+    for k in range(len(img_kpts)):
+        kpt = key_points[k].reshape((num_joints, 3))
+        # ignore zero kpts
+        mask = np.isin(kpt, zero_kpt)
+        kpt = np.where(mask, np.nan, kpt)
+        left_top = np.nanmin(kpt, axis=0)
+        right_bottom = np.nanmax(kpt, axis=0)
+
+        out_keypoints.append(list(key_points[k]))
+        out_scores.append(img_kpts[k]['score'])
+        out_bbox.append([left_top[1], left_top[0], right_bottom[1], right_bottom[0]])
+    return out_keypoints, out_scores, out_bbox
+
+
+def parse(network_outputs,
+          output_shape,
+          adjust=False,
+          refine=False,
+          network_postprocess=False,
+          tag_per_joint=17,
+          max_num_people=30,
+          nms_kernel=5,
+          nms_padding=2,
+          num_joints=17,
+          joint_order=default_joint_order,
+          detection_threshold=0.1,
+          ignore_too_much=False,
+          use_detection_val=True,
+          tag_threshold=1.0
+          ):
+    if network_postprocess:
+        tag_k, ind_k, val_k = network_outputs
+        x = ind_k % output_shape[1]
+        y = (ind_k / output_shape[1]).astype(ind_k.dtype)
+        ind_k = np.stack([x, y], axis=2)
+
+        topk_output_dict = {'tag_k': tag_k[np.newaxis, ...],
+                            'loc_k': ind_k[np.newaxis, ...],
+                            'val_k': val_k[np.newaxis, ...],
+                            }
+    else:
+        det, tag = network_outputs
+        # topk_output_dict
+        # {'tag_k': [num_images, max_num_people, num_joints],
+        # 'loc_k': [num_images, max_num_people, num_joints, 2],
+        # 'val_k': [num_images, max_num_people, num_joints]}
+        topk_output_dict = top_k(det=det,
+                                 tag=tag,
+                                 tag_per_joint=tag_per_joint,
+                                 max_num_people=max_num_people,
+                                 nms_kernel=nms_kernel,
+                                 nms_padding=nms_padding)
+    # ans [num_joints_detected, num_joints, 4]
+    ans = match(tag_k=topk_output_dict['tag_k'],
+                loc_k=topk_output_dict['loc_k'],
+                val_k=topk_output_dict['val_k'],
+                num_joints=num_joints,
+                joint_order=joint_order,
+                detection_threshold=detection_threshold,
+                max_num_people=max_num_people,
+                ignore_too_much=ignore_too_much,
+                use_detection_val=use_detection_val,
+                tag_threshold=tag_threshold)
+    if adjust:
+        # ans [[num_joints_detected, num_joints, 4]]
+        ans = adjust_func(ans, det[np.newaxis, ...])  # TODO support batch size > 1
+
+    scores = [i[:, 2].mean() for i in ans[0]]
+
+    if refine:
+        ans = ans[0]
+        # for every detected person
+        for _ in range(len(ans)):
+            # NotImplemented
+            if not tag_per_joint:
+                raise NotImplementedError
+
+        # ans [[num_joints_detected, num_joints, 4]]
+        ans = [ans]
+    return ans, scores
+
+
+def ResizeBilinear(img, new_height, new_width):
+    return cv2.resize(img, (new_width, new_height))
+
+
+def top_k(det,
+          tag,
+          tag_per_joint=17,
+          max_num_people=30,
+          nms_kernel=5,
+          nms_padding=2):
+    # det [144, 192, 17]
+    # tag [144, 192, 17]
+
+    # det [144, 192, 17]
+    det = nms(det,
+              nms_kernel=nms_kernel,
+              nms_padding=nms_padding)
+    # num_images 1
+    # h 144
+    # w 192
+    # num_joints 17
+    num_images, h, w, num_joints = (1,) + det.shape  # TODO: support multiple images (batch>1)
+
+    # det [num_images, h*w, num_joints]
+    det = det.reshape((num_images, -1, num_joints))
+    # val_k [num_images, max_num_people, num_joints]
+    val_k, ind = np_topk(det, max_num_people)
+
+    # tag [num_images, h*w, num_joints]
+    tag = tag.reshape((num_images, -1, num_joints))
+
+    # NotImplemented
+    if not tag_per_joint:
+        raise NotImplementedError
+        tag = tag.expand(-1, num_joints, -1, -1)
+
+    # tag_k [num_images, max_num_people, num_joints]
+    tag_k = np.zeros((num_images, max_num_people, num_joints))
+    for img in range(num_images):
+        for kp in range(num_joints):
+            tag_k[img, :, kp] = tag[img, ind[img, :, kp], kp]
+
+    x = ind % w
+    y = (ind / w).astype(ind.dtype)
+
+    # ind_k [num_images, max_num_people, num_joints, 2]
+    ind_k = np.stack([x, y], axis=3)
+
+    # {'tag_k': [num_images, max_num_people, num_joints],
+    # 'loc_k': [num_images, max_num_people, num_joints, 2],
+    # 'val_k': [num_images, max_num_people, num_joints]}
+    return {'tag_k': tag_k,
+            'loc_k': ind_k,
+            'val_k': val_k,
+            }
+
+
+def nms(det,
+        nms_kernel=5,
+        nms_padding=2):
+    # det [144, 192, 17]
+    # maxm [144, 192, 17]
+    maxm = np_max_pool(det, k=nms_kernel, p=nms_padding)
+    maxm = np.equal(maxm, det).astype(np.float32)
+    det = det * maxm
+    return det
+
+
+def np_max_pool(x,
+                k=5,
+                p=2,
+                p_value=0):
+    # x [144, 192, 17]
+    # k - kernel size (h, w)
+    # p - padding size (top, bottom, left, right)
+    if isinstance(k, int):
+        k = (k, k)
+    if isinstance(p, int):
+        p = ((p, p), (p, p), (0, 0))
+    elif isinstance(p, (list, tuple)) and len(p) == 2:
+        p = ((p[0], p[0]), (p[1], p[1]), (0, 0))
+
+    # y [148, 196, 17
+    y = np.pad(x, p)
+    out = np.concatenate(
+        [np.max(np.concatenate([y[ky:ky + y.shape[0] - k[0] + 1, kx:kx + y.shape[1] - k[1] + 1, c:c + 1]
+                                for ky in range(k[0])
+                                for kx in range(k[1])], 2), axis=2, keepdims=True) for c in range(y.shape[2])], 2)
+    # out [144, 192, 17]
+    return out
+
+
+def np_topk(x, k):
+    # x [1, 27648, 17]
+    # n_images 1
+    # n_keypoints 17
+    n_images, _, n_keypoints = x.shape
+    # vals [1, k, 17]
+    # inds [1, k, 17]
+    vals = np.zeros((n_images, k, n_keypoints), dtype=x.dtype)
+    inds = np.zeros((n_images, k, n_keypoints), dtype=np.int64)
+    for img in range(n_images):
+        for kp in range(n_keypoints):
+            # _inds [k]
+            _inds = np.argpartition(x[img, :, kp], -k)[-k:]
+            _inds = _inds[np.argsort(x[img, _inds, kp], )][::-1]
+            inds[img, :, kp] = _inds
+            vals[img, :, kp] = x[img, _inds, kp]
+    return vals, inds
+
+
+def match(tag_k,
+          loc_k,
+          val_k,
+          num_joints=17,
+          joint_order=default_joint_order,
+          detection_threshold=0.1,
+          max_num_people=30,
+          ignore_too_much=False,
+          use_detection_val=True,
+          tag_threshold=1.0):
+    def m(x):
+        return match_by_tag(inp=x,
+                            num_joints=num_joints,
+                            joint_order=joint_order,
+                            detection_threshold=detection_threshold,
+                            max_num_people=max_num_people,
+                            ignore_too_much=ignore_too_much,
+                            use_detection_val=use_detection_val,
+                            tag_threshold=tag_threshold)
+    return list(map(m, zip(tag_k, loc_k, val_k)))
+
+
+def match_by_tag(inp,
+                 num_joints=17,
+                 joint_order=default_joint_order,
+                 detection_threshold=0.1,
+                 max_num_people=30,
+                 ignore_too_much=False,
+                 use_detection_val=True,
+                 tag_threshold=1.0):
+    # tag_k [num_images, max_num_people, num_joints]
+    # loc_k [num_images, max_num_people, num_joints, 2]
+    # val_k [num_images, max_num_people, num_joints]
+    tag_k, loc_k, val_k = inp
+    # default_ [num_joints, 4]
+    default_ = np.zeros((num_joints, 3 + 1))  # tag_k.shape[2] assumed to be 1  # pytorch shape: (17, 4)
+
+    joint_dict = {}
+    tag_dict = {}
+    for i in range(num_joints):
+        idx = joint_order[i]
+
+        # tags [max_num_people, 1]
+        tags = tag_k[:, idx:idx + 1]
+        # joints [max_num_people, 4]
+        joints = np.concatenate((loc_k[:, idx, :], val_k[:, idx:idx + 1], tags), 1)
+        # mask [max_num_people]
+        mask = joints[:, 2] > detection_threshold
+        tags = tags[mask]
+        joints = joints[mask]
+
+        if joints.shape[0] == 0:
+            continue
+
+        if i == 0 or len(joint_dict) == 0:
+            for tag, joint in zip(tags, joints):
+                key = tag[0]
+                joint_dict.setdefault(key, np.copy(default_))[idx] = joint
+                tag_dict[key] = [tag]
+        else:
+            grouped_keys = list(joint_dict.keys())[:max_num_people]
+            grouped_tags = [np.mean(tag_dict[i], axis=0) for i in grouped_keys]
+
+            if ignore_too_much \
+                    and len(grouped_keys) == max_num_people:
+                continue
+
+            diff = joints[:, None, 3:] - np.array(grouped_tags)[None, :, :]
+            diff_normed = np.linalg.norm(diff, ord=2, axis=2)
+            diff_saved = np.copy(diff_normed)
+
+            if use_detection_val:
+                diff_normed = np.round(diff_normed) * 100 - joints[:, 2:3]
+
+            num_added = diff.shape[0]
+            num_grouped = diff.shape[1]
+
+            if num_added > num_grouped:
+                diff_normed = np.concatenate(
+                    (
+                        diff_normed,
+                        np.zeros((num_added, num_added - num_grouped)) + 1e10
+                    ),
+                    axis=1
+                )
+
+            pairs = py_max_match(diff_normed)
+            for row, col in pairs:
+                if (
+                        row < num_added
+                        and col < num_grouped
+                        and diff_saved[row][col] < tag_threshold
+                ):
+                    key = grouped_keys[col]
+                    joint_dict[key][idx] = joints[row]
+                    tag_dict[key].append(tags[row])
+                else:
+                    key = tags[row][0]
+                    joint_dict.setdefault(key, np.copy(default_))[idx] = \
+                        joints[row]
+                    tag_dict[key] = [tags[row]]
+
+    # ans [len(joint_dict), num_joints, 4]
+    ans = np.array([joint_dict[i] for i in joint_dict]).astype(np.float32)
+    return ans
+
+
+def py_max_match(scores):
+    m = Munkres()
+    tmp = m.compute(scores)
+    tmp = np.array(tmp).astype(np.int32)
+    return tmp
+
+
+def adjust_func(ans, det):
+    # ans [[num_joints_detected, num_joints, 4]]
+    # det [144, 192, 17]
+    for batch_id, people in enumerate(ans):
+        for people_id, i in enumerate(people):
+            for joint_id, joint in enumerate(i):
+                if joint[2] > 0:
+                    y, x = joint[0:2]
+                    xx, yy = int(x), int(y)
+                    # print(batch_id, joint_id, det[batch_id].shape)
+                    tmp = det[batch_id][..., joint_id]
+                    if tmp[xx, min(yy + 1, tmp.shape[1] - 1)] > tmp[xx, max(yy - 1, 0)]:
+                        y += 0.25
+                    else:
+                        y -= 0.25
+
+                    if tmp[min(xx + 1, tmp.shape[0] - 1), yy] > tmp[max(0, xx - 1), yy]:
+                        x += 0.25
+                    else:
+                        x -= 0.25
+                    ans[batch_id][people_id, joint_id, 0:2] = (y + 0.5, x + 0.5)
+    # ans [[num_joints_detected, num_joints, 4]]
+    return ans
+
+
+def refine_func(det, tag, keypoints):
+    # det [144, 192, 17]
+    # tag [144, 192, 17]
+    # keypoints [num_joints, 4]
+    if len(tag.shape) == 3:
+        # tag shape: (17, 128, 128, 1)
+        # tag [144, 192, 17, 1]
+        tag = tag[:, :, :, None]
+
+    tags = []
+    for i in range(keypoints.shape[0]):
+        if keypoints[i, 2] > 0:
+            # save tag value of detected keypoint
+            x, y = keypoints[i][:2].astype(np.int32)
+            tags.append(tag[y, x, i])
+
+    # mean tag of current detected people
+    prev_tag = np.mean(tags, axis=0)
+    ans = []
+
+    for i in range(keypoints.shape[0]):
+        # score of joints i at all position
+        tmp = det[:, :, i]
+        # distance of all tag values with mean tag of current detected people
+        tt = (((tag[:, :, i] - prev_tag[None, None, :]) ** 2).sum(axis=2) ** 0.5)
+        tmp2 = tmp - np.round(tt)
+
+        # find maximum position
+        y, x = np.unravel_index(np.argmax(tmp2), tmp.shape)
+        xx = x
+        yy = y
+        # detection score at maximum position
+        val = tmp[y, x]
+        # offset by 0.5
+        x += 0.5
+        y += 0.5
+
+        # add a quarter offset
+        if tmp[yy, min(xx + 1, tmp.shape[1] - 1)] > tmp[yy, max(xx - 1, 0)]:
+            x += 0.25
+        else:
+            x -= 0.25
+
+        if tmp[min(yy + 1, tmp.shape[0] - 1), xx] > tmp[max(0, yy - 1), xx]:
+            y += 0.25
+        else:
+            y -= 0.25
+
+        ans.append((x, y, val))
+    ans = np.array(ans)
+
+    if ans is not None:
+        for i in range(det.shape[2]):
+            # add keypoint if it is not detected
+            if ans[i, 2] > 0 and keypoints[i, 2] == 0:
+                # if ans[i, 2] > 0.01 and keypoints[i, 2] == 0:
+                keypoints[i, :2] = ans[i, :2]
+                keypoints[i, 2] = ans[i, 2]
+    # keypoints [num_joints_detected, num_joints, 4]
+    return keypoints
+
+
+def processKeypoints(keypoints):
+    # keypoints [17, 4]
+    tmp = keypoints.copy()
+    if keypoints[:, 2].max() > 0:
+        num_keypoints = keypoints.shape[0]
+        for i in range(num_keypoints):
+            tmp[i][0:3] = [
+                float(keypoints[i][0]),
+                float(keypoints[i][1]),
+                float(keypoints[i][2])
+            ]
+
+    return tmp
diff --git a/picamera2/devices/imx500/postprocess_nanodet.py b/picamera2/devices/imx500/postprocess_nanodet.py
new file mode 100644
index 00000000..b7f1d0b8
--- /dev/null
+++ b/picamera2/devices/imx500/postprocess_nanodet.py
@@ -0,0 +1,63 @@
+"""
+Nanodet postprocessing
+
+This code is based on:
+https://github.com/RangiLyu/nanodet
+"""
+
+from typing import Tuple
+
+import numpy as np
+
+from picamera2.devices.imx500.postprocess import combined_nms, softmax
+
+
+def postprocess_nanodet_detection(outputs,
+                                  conf: float = 0.0,
+                                  iou_thres: float = 0.65,
+                                  max_out_dets: int = 300) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+    reg_max = 7
+    num_categories = 80
+    classes = outputs[..., :num_categories]
+    boxes = outputs[..., num_categories:]
+    classes = 1 / (1 + np.exp(-classes))  # sigmoid
+
+    # Extract feature map sizes
+    strides = [8, 16, 32, 64]
+    featmap_sizes = [(np.ceil(416 / stride), np.ceil(416 / stride)) for stride in strides]
+
+    # Generate priors
+    anchors = generate_anchors_NANODET(featmap_sizes, strides)
+
+    # Decode bboxes
+    batch = boxes.shape[0]
+    x = np.reshape(boxes, newshape=(batch, -1, 4, reg_max + 1))
+    x = softmax(x)
+    x = np.matmul(x, np.arange(0, reg_max + 1, 1, dtype=np.float32))
+    x = np.reshape(x, newshape=(batch, -1, 4))
+    distances = x * anchors[..., 2, None]
+
+    # Output Box format: [x_c, y_c, w, h]
+    w = distances[..., 0:1] + distances[..., 2:3]
+    h = distances[..., 1:2] + distances[..., 3:4]
+    x_c = anchors[..., 0:1] - distances[..., 0:1] + w / 2
+    y_c = anchors[..., 1:2] - distances[..., 1:2] + h / 2
+    boxes = np.concatenate([x_c, y_c, w, h], axis=2)
+
+    return combined_nms(boxes, classes, iou_thres, conf, max_out_dets)
+
+
+def generate_anchors_NANODET(featmap_sizes, strides):
+    anchors_list = []
+    for i, stride in enumerate(strides):
+        h, w = featmap_sizes[i]
+        x_range = np.arange(w) * stride
+        y_range = np.arange(h) * stride
+        y, x = np.meshgrid(y_range, x_range)
+        y = y.flatten()
+        x = x.flatten()
+        strides = np.ones_like(x) * stride
+        anchors = np.stack([y, x, strides, strides], axis=-1)
+        anchors = np.expand_dims(anchors, axis=0)
+        anchors_list.append(anchors)
+    return np.concatenate(anchors_list, axis=1)
diff --git a/picamera2/devices/imx500/postprocess_yolov5.py b/picamera2/devices/imx500/postprocess_yolov5.py
new file mode 100644
index 00000000..d9c5600f
--- /dev/null
+++ b/picamera2/devices/imx500/postprocess_yolov5.py
@@ -0,0 +1,244 @@
+"""
+Yolov5 postprocessing
+
+This code is based on:
+https://github.com/ultralytics/ultralytics
+"""
+from typing import List
+
+import cv2
+import numpy as np
+
+from picamera2.devices.imx500.postprocess import (
+    BoxFormat, convert_to_ymin_xmin_ymax_xmax_format, nms)
+
+default_anchors = [[10, 13, 16, 30, 33, 23],
+                   [30, 61, 62, 45, 59, 119],
+                   [116, 90, 156, 198, 373, 326]]
+default_strides = [8, 16, 32]
+
+
+def postprocess_yolov5_detection(outputs: List[np.ndarray],
+                                 model_input_shape=(640, 640),
+                                 num_categories=80,
+                                 min_wh=2,
+                                 max_wh=7680,
+                                 conf_thres: float = 0.001,
+                                 iou_thres: float = 0.65,
+                                 max_nms_dets: int = 5000,
+                                 max_out_dets: int = 1000):
+    H, W = model_input_shape
+    ############################################################
+    # Box decoding
+    ############################################################
+    outputs_decoded = box_decoding_yolov5n(tensors=outputs, num_categories=num_categories, H=H, W=W)
+
+    ############################################################
+    # Post processing for each input image
+    ############################################################
+    # Note: outputs_decoded shape is [Batch,num_anchors*Detections,(4+1+num_categories)]
+    post_processed_outputs = []
+    for _, x in enumerate(outputs_decoded):
+        # ----------------------------------------
+        # Filter by score and width-height
+        # ----------------------------------------
+        scores = x[..., 4]
+        wh = x[..., 2:4]
+        valid_indexs = (scores > conf_thres) & ((wh > min_wh).any(1)) & ((wh < max_wh).any(1))
+        x = x[valid_indexs]
+
+        # ----------------------------------------
+        # Taking Best class only
+        # ----------------------------------------
+        x[..., 5:] *= x[..., 4:5]  # compute confidence per class (class_score * object_score)
+        conf = np.max(x[:, 5:], axis=1, keepdims=True)
+        classes_id = np.argmax(x[:, 5:], axis=1, keepdims=True)
+
+        # Change boxes format from [x_c,y_c,w,h] to [y_min,x_min,y_max,x_max]
+        boxes = convert_to_ymin_xmin_ymax_xmax_format(x[..., :4], BoxFormat.XC_YC_W_H)
+        x = np.concatenate((boxes, conf, classes_id), axis=1)[conf.reshape(-1) > conf_thres]
+
+        # --------------------------- #
+        # NMS
+        # --------------------------- #
+        x = x[np.argsort(-x[:, 4])[:max_nms_dets]]  # sort by confidence from high to low
+        offset = x[..., 5:6] * np.maximum(H, W)
+        boxes_offset, scores = x[..., :4] + offset, x[..., 4]  # boxes with offset by class
+        valid_indexs = nms(dets=boxes_offset, scores=scores, iou_thres=iou_thres, max_out_dets=max_out_dets)
+        x = x[valid_indexs]
+
+        boxes = x[..., :4]
+        # --------------------------- #
+        # Classes process
+        # --------------------------- #
+        # convert classes from coco80 to coco91 to match labels
+        classes = coco80_to_coco91(x[..., 5]) if num_categories == 80 else x[..., 5]
+        classes -= 1
+
+        # --------------------------- #
+        # Scores
+        # --------------------------- #
+        scores = x[..., 4]
+
+        # Add result
+        post_processed_outputs.append({'boxes': boxes, 'classes': classes, 'scores': scores})
+
+    return post_processed_outputs[0]['boxes'], post_processed_outputs[0]['scores'], post_processed_outputs[0]['classes']
+
+
+def box_decoding_yolov5n(tensors,
+                         num_categories=80,
+                         H=640,
+                         W=640,
+                         anchors=default_anchors,
+                         strides=default_strides):
+    # Tensors box format: [x_c, y_c, w, h]
+    no = num_categories + 5  # number of outputs per anchor
+    nl = len(anchors)  # number of detection layers
+    na = len(anchors[0]) // 2  # number of anchors
+    anchor_grid = np.reshape(np.array(anchors), [nl, 1, -1, 1, 2])
+    anchor_grid = anchor_grid.astype(np.float32)
+    z = []
+    for i in range(nl):
+        ny, nx = H // strides[i], W // strides[i]
+        xv, yv = np.meshgrid(np.arange(nx), np.arange(ny))
+        grid = np.reshape(np.stack([xv, yv], 2), [1, 1, ny * nx, 2]).astype(np.float32)
+
+        y = tensors[i]
+        y = np.transpose(y, [0, 2, 1, 3])
+        xy = (y[..., 0:2] * 2 - 0.5 + grid) * strides[i]  # xy
+        wh = (y[..., 2:4] * 2) ** 2 * anchor_grid[i]
+
+        # Output box format: [x_c, y_c, w, h]
+        y = np.concatenate([xy, wh, y[..., 4:]], -1)
+        z.append(np.reshape(y, [-1, na * ny * nx, no]))
+
+    return np.concatenate(z, 1)
+
+
+# same as in preprocess but differs in h/w location
+def scale_boxes(boxes: np.ndarray, h_image: int, w_image: int, h_model: int, w_model: int,
+                preserve_aspect_ratio: bool) -> np.ndarray:
+    """
+    Scale and offset bounding boxes based on model output size and original image size.
+
+    Args:
+        boxes (numpy.ndarray): Array of bounding boxes in format [y_min, x_min, y_max, x_max].
+        h_image (int): Original image height.
+        w_image (int): Original image width.
+        h_model (int): Model output height.
+        w_model (int): Model output width.
+        preserve_aspect_ratio (bool): Whether to preserve image aspect ratio during scaling
+
+    Returns:
+        numpy.ndarray: Scaled and offset bounding boxes.
+    """
+    deltaH, deltaW = 0, 0
+    H, W = h_model, w_model
+    scale_H, scale_W = h_image / H, w_image / W
+
+    if preserve_aspect_ratio:
+        scale_H = scale_W = max(h_image / H, w_image / W)
+        H_tag = int(np.round(h_image / scale_H))
+        W_tag = int(np.round(w_image / scale_W))
+        deltaH, deltaW = int((H - H_tag) / 2), int((W - W_tag) / 2)
+
+    # Scale and offset boxes
+    boxes[..., 0] = (boxes[..., 0] - deltaH) * scale_H
+    boxes[..., 1] = (boxes[..., 1] - deltaW) * scale_W
+    boxes[..., 2] = (boxes[..., 2] - deltaH) * scale_H
+    boxes[..., 3] = (boxes[..., 3] - deltaW) * scale_W
+
+    # Clip boxes
+    boxes = clip_boxes(boxes, h_image, w_image)
+
+    return boxes
+
+
+# same as in preprocess but differs in h/w location
+def clip_boxes(boxes: np.ndarray, h: int, w: int) -> np.ndarray:
+    """
+    Clip bounding boxes to stay within the image boundaries.
+
+    Args:
+        boxes (numpy.ndarray): Array of bounding boxes in format [y_min, x_min, y_max, x_max].
+        h (int): Height of the image.
+        w (int): Width of the image.
+
+    Returns:
+        numpy.ndarray: Clipped bounding boxes.
+    """
+    boxes[..., 0] = np.clip(boxes[..., 0], a_min=0, a_max=h)
+    boxes[..., 1] = np.clip(boxes[..., 1], a_min=0, a_max=w)
+    boxes[..., 2] = np.clip(boxes[..., 2], a_min=0, a_max=h)
+    boxes[..., 3] = np.clip(boxes[..., 3], a_min=0, a_max=w)
+    return boxes
+
+
+def _normalize_coordinates(boxes, orig_width, orig_height, boxes_format):
+    """
+    Gets boxes in the original images values and normalize them to be between 0 to 1
+
+    :param boxes:
+    :param orig_width: original image width
+    :param orig_height: original image height
+    :param boxes_format: if the boxes are in XMIN_YMIN_W_H or YMIM_XMIN_YMAX_XMAX format
+    :return:
+    """
+    if len(boxes) == 0:
+        return boxes
+    elif _are_boxes_normalized(boxes):
+        return boxes
+    boxes[:, 0] = np.divide(boxes[:, 0], orig_height)
+    boxes[:, 1] = np.divide(boxes[:, 1], orig_width)
+    boxes[:, 2] = np.divide(boxes[:, 2], orig_height)
+    boxes[:, 3] = np.divide(boxes[:, 3], orig_width)
+    return boxes
+
+
+def _are_boxes_normalized(boxes):
+    if len(boxes) == 0:
+        return True  # it doesn't matter
+    if max(boxes[0]) > 1:
+        return False
+    return True
+
+
+def apply_normalization(boxes, orig_width, orig_height, boxes_format):
+    if _are_boxes_normalized(boxes):
+        return boxes
+    return _normalize_coordinates(boxes, orig_width, orig_height, boxes_format)
+
+
+# Locate at tutorials
+def coco80_to_coco91(x):  # converts 80-index to 91-index
+    coco91Indexs = np.array(
+        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34,
+         35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,
+         63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90])
+
+    return coco91Indexs[x.astype(np.int32)]
+
+
+def yolov5n_preprocess(img):
+    # AspectPreservingResizeWithPad
+    new_height = 640
+    new_width = 640
+    pad_value = 114
+    resize_method = 3  # area
+    resize_ratio = max(img.shape[0] / new_height, img.shape[1] / new_width)
+    height_tag = int(np.round(img.shape[0] / resize_ratio))
+    width_tag = int(np.round(img.shape[1] / resize_ratio))
+    pad_values = ((int((new_height - height_tag) / 2), int((new_height - height_tag) / 2 + 0.5)),
+                  (int((new_width - width_tag) / 2), int((new_width - width_tag) / 2 + 0.5)),
+                  (0, 0))
+
+    resized_img = cv2.resize(img, (width_tag, height_tag), interpolation=resize_method)
+    padded_img = np.pad(resized_img, pad_values, constant_values=pad_value)
+
+    # Normalize
+    mean = 0
+    std = 255
+    normalized_img = (padded_img - mean) / std
+
+    return normalized_img
diff --git a/picamera2/devices/imx500/postprocess_yolov8.py b/picamera2/devices/imx500/postprocess_yolov8.py
new file mode 100644
index 00000000..91a1d63a
--- /dev/null
+++ b/picamera2/devices/imx500/postprocess_yolov8.py
@@ -0,0 +1,176 @@
+"""
+Yolov5 postprocessing
+
+This code is based on:
+https://github.com/ultralytics/ultralytics
+"""
+from typing import Tuple
+
+import cv2
+import numpy as np
+
+from picamera2.devices.imx500.postprocess import (
+    BoxFormat, combined_nms, combined_nms_seg,
+    convert_to_ymin_xmin_ymax_xmax_format, crop_mask, nms)
+
+
+def postprocess_yolov8_detection(outputs: Tuple[np.ndarray, np.ndarray],
+                                 conf: float = 0.3,
+                                 iou_thres: float = 0.7,
+                                 max_out_dets: int = 50) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+    """
+    Postprocess the outputs of a YOLOv8 model for object detection
+
+    Args:
+        outputs (Tuple[np.ndarray, np.ndarray]): Tuple containing the model outputs for bounding boxes and class predictions.
+        conf (float, optional): Confidence threshold for bounding box predictions. Default is 0.3
+        iou_thres (float, optional): IoU (Intersection over Union) threshold for Non-Maximum Suppression (NMS). Default is 0.7.
+        max_out_dets (int, optional): Maximum number of output detections to keep after NMS. Default is 50.
+
+    Returns:
+        Tuple[np.ndarray, np.ndarray, np.ndarray]: Tuple containing the post-processed bounding boxes,
+            their corresponding scores, and categories.
+    """
+    feat_sizes = np.array([80, 40, 20])
+    stride_sizes = np.array([8, 16, 32])
+    a, s = (x.transpose() for x in make_anchors_yolo_v8(feat_sizes, stride_sizes, 0.5))
+
+    y_bb, y_cls = outputs
+    dbox = dist2bbox_yolo_v8(y_bb, a, xywh=True, dim=1) * s
+    detect_out = np.concatenate((dbox, y_cls), 1)
+
+    xd = detect_out.transpose([0, 2, 1])
+
+    return combined_nms(xd[..., :4], xd[..., 4:84], iou_thres, conf, max_out_dets)
+
+
+def postprocess_yolov8_keypoints(outputs: Tuple[np.ndarray, np.ndarray, np.ndarray],
+                                 conf: float = 0.3,
+                                 iou_thres: float = 0.7,
+                                 max_out_dets: int = 300) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+    """
+    Postprocess the outputs of a YOLOv8 model for object detection and pose estimation.
+
+    Args:
+        outputs (Tuple[np.ndarray, np.ndarray, np.ndarray]): Tuple containing the model outputs for bounding boxes,
+        class predictions, and keypoint predictions.
+        conf (float, optional): Confidence threshold for bounding box predictions. Default is 0.3
+        iou_thres (float, optional): IoU (Intersection over Union) threshold for Non-Maximum Suppression (NMS). Default is 0.7.
+        max_out_dets (int, optional): Maximum number of output detections to keep after NMS. Default is 300.
+
+    Returns:
+        Tuple[np.ndarray, np.ndarray, np.ndarray]: Tuple containing the post-processed bounding boxes, their
+        corresponding scores, and keypoints.
+
+    """
+    kpt_shape = (17, 3)
+    feat_sizes = np.array([80, 40, 20])
+    stride_sizes = np.array([8, 16, 32])
+    a, s = (x.transpose() for x in make_anchors_yolo_v8(feat_sizes, stride_sizes, 0.5))
+
+    y_bb, y_cls, kpts = outputs
+    dbox = dist2bbox_yolo_v8(y_bb, a, xywh=True, dim=1) * s
+    detect_out = np.concatenate((dbox, y_cls), 1)
+    # additional part for pose estimation
+    ndim = kpt_shape[1]
+    pred_kpt = kpts.copy()
+    if ndim == 3:
+        pred_kpt[:, 2::3] = 1 / (1 + np.exp(-pred_kpt[:, 2::3]))  # sigmoid (WARNING: inplace .sigmoid_() Apple MPS bug)
+    pred_kpt[:, 0::ndim] = (pred_kpt[:, 0::ndim] * 2.0 + (a[0] - 0.5)) * s
+    pred_kpt[:, 1::ndim] = (pred_kpt[:, 1::ndim] * 2.0 + (a[1] - 0.5)) * s
+
+    x = np.concatenate([detect_out.transpose([2, 1, 0]).squeeze(), pred_kpt.transpose([2, 1, 0]).squeeze()], 1)
+    x = x[(x[:, 4] > conf)]
+    x = x[np.argsort(-x[:, 4])[:8400]]
+    x[..., :4] = convert_to_ymin_xmin_ymax_xmax_format(x[..., :4], BoxFormat.XC_YC_W_H)
+    boxes = x[..., :4]
+    scores = x[..., 4]
+
+    # Original post-processing part
+    valid_indexs = nms(boxes, scores, iou_thres=iou_thres, max_out_dets=max_out_dets)
+    x = x[valid_indexs]
+    nms_bbox = x[:, :4]
+    nms_scores = x[:, 4]
+    nms_kpts = x[:, 5:]
+
+    return nms_bbox, nms_scores, nms_kpts
+
+
+def postprocess_yolov8_inst_seg(outputs: Tuple[np.ndarray, np.ndarray, np.ndarray],
+                                conf: float = 0.001,
+                                iou_thres: float = 0.7,
+                                max_out_dets: int = 300) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+    feat_sizes = np.array([80, 40, 20])
+    stride_sizes = np.array([8, 16, 32])
+    a, s = (x.transpose() for x in make_anchors_yolo_v8(feat_sizes, stride_sizes, 0.5))
+
+    y_bb, y_cls, ymask_weights, y_masks = outputs
+    dbox = dist2bbox_yolo_v8(y_bb, a, xywh=True, dim=1) * s
+    detect_out = np.concatenate((dbox, y_cls), 1)
+
+    xd = detect_out.transpose([0, 2, 1])
+    nms_bbox, nms_scores, nms_classes, ymask_weights = combined_nms_seg(xd[..., :4], xd[..., 4:84],
+                                                                        ymask_weights, iou_thres, conf, max_out_dets)[0]
+    if len(nms_scores) == 0:
+        final_masks = y_masks
+    else:
+        y_masks = y_masks.squeeze(0)
+        ymask_weights = ymask_weights.transpose(1, 0)
+        final_masks = np.tensordot(ymask_weights, y_masks, axes=([0], [0]))
+
+    return nms_bbox, nms_scores, nms_classes, final_masks
+
+
+def make_anchors_yolo_v8(feats, strides, grid_cell_offset=0.5):
+    """Generate anchors from features."""
+    anchor_points, stride_tensor = [], []
+    assert feats is not None
+    for i, stride in enumerate(strides):
+        h, w = feats[i], feats[i]
+        sx = np.arange(stop=w) + grid_cell_offset  # shift x
+        sy = np.arange(stop=h) + grid_cell_offset  # shift y
+        sy, sx = np.meshgrid(sy, sx, indexing='ij')
+        anchor_points.append(np.stack((sx, sy), -1).reshape((-1, 2)))
+        stride_tensor.append(np.full((h * w, 1), stride))
+    return np.concatenate(anchor_points), np.concatenate(stride_tensor)
+
+
+def dist2bbox_yolo_v8(distance, anchor_points, xywh=True, dim=-1):
+    """Transform distance(ltrb) to box(xywh or xyxy)."""
+    lt, rb = np.split(distance, 2, axis=dim)
+    x1y1 = anchor_points - lt
+    x2y2 = anchor_points + rb
+    if xywh:
+        c_xy = (x1y1 + x2y2) / 2
+        wh = x2y2 - x1y1
+        return np.concatenate((c_xy, wh), dim)  # xywh bbox
+    return np.concatenate((x1y1, x2y2), dim)  # xyxy bbox
+
+
+def pad_with_zeros(mask, roi, isp_output_size):
+    new_shape = (isp_output_size.width, isp_output_size.height, mask.shape[2])
+    padded_mask = np.zeros(new_shape, dtype=mask.dtype)
+    padded_mask[roi.x:roi.x + mask.shape[0], roi.y:roi.y + mask.shape[1], :] = mask
+    return padded_mask
+
+
+def process_masks(masks, boxes, roi, isp_output_size):
+    # Crop masks based on bounding boxes
+    masks = crop_mask(masks, boxes)
+
+    # Apply sigmoid function to normalize masks
+    masks = 1 / (1 + np.exp(-masks))
+    masks = np.transpose(masks, (2, 1, 0))  # Change to HWC format
+
+    # Resize masks to model input size
+    masks = cv2.resize(masks, (roi.height, roi.width), interpolation=cv2.INTER_LINEAR)
+
+    # Ensure masks are in the correct shape
+    masks = np.expand_dims(masks, -1) if len(masks.shape) == 2 else masks
+
+    masks = pad_with_zeros(masks, roi, isp_output_size)
+
+    # Ensure masks are in the correct shape
+    masks = np.expand_dims(masks, -1) if len(masks.shape) == 2 else masks
+    masks = np.transpose(masks, (2, 1, 0))  # Change back to CHW format
+    return masks
diff --git a/setup.py b/setup.py
index f1d8e6b8..a5acaa6d 100644
--- a/setup.py
+++ b/setup.py
@@ -30,9 +30,12 @@
         "Programming Language :: Python :: 3.9",
         "Topic :: Multimedia :: Graphics :: Capture :: Digital Camera",
     ],
-    packages=['picamera2', 'picamera2.devices', 'picamera2.devices.hailo', 'picamera2.devices.imx708',
-              'picamera2.encoders', 'picamera2.outputs', 'picamera2.previews', 'picamera2.allocators'],
+    packages=['picamera2', 'picamera2.devices', 'picamera2.devices.hailo', 'picamera2.devices.imx500',
+              'picamera2.devices.imx708', 'picamera2.encoders', 'picamera2.outputs', 'picamera2.previews',
+              'picamera2.allocators'],
     python_requires='>=3.9',
     licence='BSD 2-Clause License',
-    install_requires=['numpy', 'PiDNG', 'piexif', 'pillow', 'simplejpeg', 'v4l2-python3', 'python-prctl', 'av'],
+    install_requires=['numpy', 'PiDNG', 'piexif', 'pillow', 'simplejpeg', 'v4l2-python3',
+                      'python-prctl', 'av', 'python3-libarchive-c', 'python3-tqdm',
+                      'python3-jsonschema'],
     extras_require={"gui": ['pyopengl', 'PyQt5']})