-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathindex.html
719 lines (635 loc) · 48.4 KB
/
index.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<head>
<meta name="generator" content="HTML Tidy for Linux/x86 (vers 11 February 2007), see www.w3.org">
<style type="text/css">
/* Design Credits: Jon Barron and Deepak Pathak and Abhishek Kar and Saurabh Gupta*/
a {
color: #1772d0;
text-decoration:none;
}
a:focus, a:hover {
color: #f09228;
text-decoration:none;
}
body,td,th {
font-family: 'Titillium Web', Verdana, Helvetica, sans-serif;
font-size: 16px;
font-weight: 400
}
heading {
font-family: 'Titillium Web', Verdana, Helvetica, sans-serif;
font-size: 17px; /* 19 */
font-weight: 600 /* 1000 */
}
hr
{
border: 0;
height: 1px;
background-image: linear-gradient(to right, rgba(0, 0, 0, 0), rgba(0, 0, 0, 0.75), rgba(0, 0, 0, 0));
}
strong {
font-family: 'Titillium Web', Verdana, Helvetica, sans-serif;
font-size: 16px;
font-weight: 600 /* 800 */
}
strongred {
font-family: 'Titillium Web', Verdana, Helvetica, sans-serif;
color: 'red' ;
font-size: 16px
}
sectionheading {
font-family: 'Titillium Web', Verdana, Helvetica, sans-serif;
font-size: 22px;
font-weight: 600
}
pageheading {
font-family: 'Titillium Web', Verdana, Helvetica, sans-serif;
font-size: 38px;
font-weight: 400
}
.ImageBorder
{
border-width: 1px;
border-color: Black;
}
</style>
<link rel="shortcut icon" href="images/fav_icon.jpg">
<script type="text/javascript" src="js/hidebib.js"></script>
<title>Zipeng Fu</title>
<meta name="Zipeng Fu's Homepage" http-equiv="Content-Type" content="Zipeng Fu's Homepage">
<link href='https://fonts.googleapis.com/css?family=Titillium+Web:400,600,400italic,600italic,300,300italic' rel='stylesheet' type='text/css'>
<!-- Start : Google Analytics Code -->
<script>
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
ga('create', 'UA-XXXXX-Y', 'auto');
ga('send', 'pageview');
</script>
<!-- End : Google Analytics Code -->
<!-- Scramble Script by Jeff Donahue -->
<script src="js/scramble.js"></script>
</head>
<body>
<table width="900" border="0" align="center" border="0" cellspacing="0" cellpadding="20">
<tr><td>
<table width="100%" align="center" border="0" cellspacing="0" cellpadding="20">
<p align="center">
<pageheading>Zipeng Fu</pageheading><br>
</p>
<tr>
<td width="30%" valign="top"><a href="images/ZipengFu.jpg"><img src="images/ZipengFu.jpg" width="100%" style="border-radius:15px"></a>
<p align=center>
| <a href="data/ZipengFu_CV.pdf">CV</a> |
<a href="mailto:[email protected]">Email</a> |
<a href="https://scholar.google.com/citations?user=wMcPTbEAAAAJ&sortby=pubdate">G Scholar</a> |
<br/>
| <a href="https://github.com/MarkFzp">Github</a> |
<a href="https://twitter.com/zipengfu">Twitter</a> |
<a href="https://www.linkedin.com/in/zipengfu">LinkedIn</a> |
</p>
</td>
<td width="70%" valign="top" align="justify">
<p>
I am a final-year PhD student in computer science at <a href="https://ai.stanford.edu/">Stanford AI Lab</a>, advised by <a href="https://ai.stanford.edu/~cbfinn/">Chelsea Finn</a>.
I was a student researcher at Google DeepMind, working with <a href="http://www.jie-tan.net/">Jie Tan</a>.
My research is supported by Pierre and Christine Lamond Fellowship.
</p>
<p>
Previously, I was a master's student in the Machine Learning Department and a student researcher in the Robotics Institute at CMU, advised by <a href="https://www.cs.cmu.edu/~dpathak">Deepak Pathak</a> and <a href="http://people.eecs.berkeley.edu/~malik/">Jitendra Malik</a>.
I completed my bachelor's in Computer Science and Applied Math at UCLA, advised by <a href="http://www.stat.ucla.edu/~sczhu/">Song-Chun Zhu</a>.
</p>
<p>
My research interests lie in the intersection of Robotics, Machine Learning and Computer Vision. I care about robust robot performance and deployable robot systems in the unstructured open world.
</p>
</td>
</tr>
</table>
<hr/>
<table width="100%" align="center" border="0" cellspacing="0" cellpadding="10">
<tr><td><sectionheading> Publications</sectionheading></td></tr>
</table>
<table width="100%" align="center" border="0" cellspacing="0" cellpadding="15">
<tr>
<td width="40%" valign="top" align="center"><a href="https://humanoid-ai.github.io">
<video playsinline autoplay loop muted src="images/humanplus-clip.mp4" poster="./images/loading-icon.gif" alt="sym" width="90%" style="padding-top:0px;padding-bottom:0px;border-radius:15px;"></video>
</a></td>
<td width="60%" valign="top">
<p><a href="https://humanoid-ai.github.io" id="HUMANPLUS">
<heading>HumanPlus: Humanoid Shadowing and Imitation from Humans</heading></a><br>
Zipeng Fu*, Qingqing Zhao*, Qi Wu*, Gordon Wetzstein, Chelsea Finn<br>
CoRL 2024<br>
<b style="color:rgb(255, 100, 100);">Best Paper Award Finalist (top 6)</b>
</p>
<div class="paper" id="humanplus">
<a href="https://humanoid-ai.github.io">webpage</a> |
<a href="https://humanoid-ai.github.io/HumanPlus.pdf">pdf</a> |
<a href="javascript:toggleblock('humanplus_abs')">abstract</a> |
<a shape="rect" href="javascript:togglebib('humanplus')" class="togglebib">bibtex</a> |
<a href="https://arxiv.org/abs/2406.10454">arXiv</a> |
<a href="https://github.com/MarkFzp/humanplus">code</a> |
<a href="https://youtu.be/FPiyv7CIV6I">video</a>
<p align="justify"> <i id="humanplus_abs">One of the key arguments for building robots that have similar form factors to human beings is that we can leverage the massive human data for training. Yet, doing so has remained challenging in practice due to the complexities in humanoid perception and control, lingering physical gaps between humanoids and humans in morphologies and actuation, and lack of a data pipeline for humanoids to learn autonomous skills from egocentric vision. In this paper, we introduce a full-stack system for humanoids to learn motion and autonomous skills from human data. We first train a low-level policy in simulation via reinforcement learning using existing 40-hour human motion datasets. This policy transfers to the real world and allows humanoid robots to follow human body and hand motion in real time using only a RGB camera, i.e. shadowing. Through shadowing, human operators can teleoperate humanoids to collect whole-body data for learning different tasks in the real world. Using the data collected, we then perform supervised behavior cloning to train skill policies using egocentric vision, allowing humanoids to complete different tasks autonomously by imitating human skills. We demonstrate the system on our customized 33-DoF 180cm humanoid, autonomously completing tasks such as wearing a shoe to stand up and walk, unloading objects from warehouse racks, folding a sweatshirt, rearranging objects, typing, and greeting another robot with 60-100% success rates using up to 40 demonstrations.</i></p>
<pre xml:space="preserve">
@inproceedings{fu2024humanplus,
author = {Fu, Zipeng and Zhao, Qingqing
and Wu, Qi and Wetzstein, Gordon
and Finn, Chelsea},
title = {HumanPlus: Humanoid Shadowing
and Imitation from Humans},
booktitle = {Conference on Robot Learning ({CoRL})},
year = {2024}
}
</pre>
</div>
</td>
</tr>
<tr>
<td width="40%" valign="top" align="center"><a href="https://mobile-aloha.github.io">
<video playsinline autoplay loop muted src="images/mobile-aloha-clip.mp4" poster="./images/loading-icon.gif" alt="sym" width="90%" style="padding-top:0px;padding-bottom:0px;border-radius:15px;"></video>
</a></td>
<td width="60%" valign="top">
<p><a href="https://mobile-aloha.github.io" id="MOBILE_ALOHA">
<heading>Mobile ALOHA: Learning Bimanual Mobile Manipulation with Low-Cost Whole-Body Teleoperation</heading></a><br>
Zipeng Fu*, Tony Z. Zhao*, Chelsea Finn<br>
CoRL 2024<br>
</p>
<div class="paper" id="mobile_aloha">
<a href="https://mobile-aloha.github.io">webpage</a> |
<a href="https://mobile-aloha.github.io/resources/mobile-aloha.pdf">pdf</a> |
<a href="javascript:toggleblock('mobile_aloha_abs')">abstract</a> |
<a shape="rect" href="javascript:togglebib('mobile_aloha')" class="togglebib">bibtex</a> |
<a href="https://arxiv.org/abs/2401.02117">arXiv</a> |
<a href="https://github.com/MarkFzp/mobile-aloha">code</a> |
<a href="https://youtu.be/zMNumQ45pJ8">video</a>
<p align="justify"> <i id="mobile_aloha_abs">Imitation learning from human demonstrations has shown impressive performance in robotics. However, most results focus on table-top manipulation, lacking the mobility and dexterity necessary for generally useful tasks. In this work, we develop a system for imitating mobile manipulation tasks that are bimanual and require whole-body control. We first present Mobile ALOHA, a low-cost and whole-body teleoperation system for data collection. It augments the ALOHA system with a mobile base, and a whole-body teleoperation interface. Using data collected with Mobile ALOHA, we then perform supervised behavior cloning and find that co-training with existing static ALOHA datasets boosts performance on mobile manipulation tasks. With 50 demonstrations for each task, co-training can increase success rates by up to 90%, allowing Mobile ALOHA to autonomously complete complex mobile manipulation tasks such as sauteing and serving a piece of shrimp, opening a two-door wall cabinet to store heavy cooking pots, calling and entering an elevator, and lightly rinsing a used pan using a kitchen faucet.</i></p>
<pre xml:space="preserve">
@inproceedings{fu2024mobile,
author = {Fu, Zipeng and
Zhao, Tony Z. and Finn, Chelsea},
title = {Mobile ALOHA: Learning Bimanual Mobile Manipulation
with Low-Cost Whole-Body Teleoperation},
booktitle = {Conference on Robot Learning ({CoRL})},
year = {2024}
}
</pre>
</div>
<p>
Media Coverage: <a href="https://www.economist.com/science-and-technology/2024/06/05/robots-are-suddenly-getting-cleverer-whats-changed">The Economist</a> |
<a href="https://www.technologyreview.com/2024/01/15/1086592/watch-this-robot-cook-shrimp-and-clean-autonomously/">MIT Technology Review</a> |
<a href="https://www.youtube.com/watch?v=HUgqyNgLf7U">TechCrunch</a> |
<a href="https://economictimes.indiatimes.com/tech/technology/google-deepmind-introduces-mobile-aloha-humanoid-system/articleshow/106569461.cms?from=mdr">The Economic Times</a> |
<a href="https://www.scmp.com/tech/tech-trends/article/3248330/stanford-students-win-hearts-china-us32000-household-robot-can-cook-shrimp-and-wash-pans">SCMP</a> |
<a href="https://biz.chosun.com/science-chosun/technology/2024/01/09/O6PVXEE4AJHBNAMJ3GU4XWCOFI/">Chosun Ilbo</a> |
<a href="https://news.cgtn.com/news/2024-01-08/Mobile-ALOHA-Stanford-s-housekeeping-robot-can-do-chores-1qc2tNgsyWI/p.html">CGTN</a> |
<a href="https://www.youtube.com/watch?v=z5KTSsUg2I8">CNet</a> |
<a href="https://news.stanford.edu/stories/2024/04/mobile-aloha-robot-is-a-quick-learner">Stanford Report</a> |
<a href="https://venturebeat.com/automation/stanfords-mobile-aloha-robot-learns-from-humans-to-cook-clean-do-laundry/">VentureBeat</a>
</p>
</td>
</tr>
<tr>
<td width="40%" valign="top" align="center"><a href="https://arxiv.org/abs/2407.07775">
<video playsinline autoplay loop muted src="images/mobility-vla-clip.mp4" poster="./images/loading-icon.gif" alt="sym" width="90%" style="padding-top:0px;padding-bottom:0px;border-radius:15px;"></video>
</a></td>
<td width="60%" valign="top">
<p><a href="https://arxiv.org/abs/2407.07775" id="mobility_vla">
<heading>Mobility VLA: Multimodal Instruction Navigation with Long-Context VLMs and Topological Graphs</heading></a><br>
Hao-Tien Lewis Chiang*, Zhuo Xu*, Zipeng Fu*, Mithun George Jacob, Tingnan Zhang, Tsang-Wei Edward Lee, Wenhao Yu, Connor Schenck, David Rendleman, Dhruv Shah, Fei Xia, Jasmine Hsu, Jonathan Hoech, Pete Florence, Sean Kirmani, Sumeet Singh, Vikas Sindhwani, Carolina Parada*, Chelsea Finn*, Peng Xu*, Sergey Levine*, Jie Tan*<br>
CoRL 2024<br>
</p>
<div class="paper" id="mobility_vla">
<a href="https://arxiv.org/pdf/2407.07775">pdf</a> |
<a href="javascript:toggleblock('mobility_vla_abs')">abstract</a> |
<a href="https://arxiv.org/abs/2401.02117">arXiv</a> |
<a href="https://youtu.be/-Tof__Q8_5s">video</a>
<p align="justify"> <i id="mobility_vla_abs">An elusive goal in navigation research is to build an intelligent agent that can understand multimodal instructions including natural language and image, and perform useful navigation. To achieve this, we study a widely useful category of navigation tasks we call Multimodal Instruction Navigation with demonstration Tours (MINT), in which the environment prior is provided through a previously recorded demonstration video. Recent advances in Vision Language Models (VLMs) have shown a promising path in achieving this goal as it demonstrates capabilities in perceiving and reasoning about multimodal inputs. However, VLMs are typically trained to predict textual output and it is an open research question about how to best utilize them in navigation. To solve MINT, we present Mobility VLA, a hierarchical Vision-Language-Action (VLA) navigation policy that combines the environment understanding and common sense reasoning power of long-context VLMs and a robust low-level navigation policy based on topological graphs. The high-level policy consists of a long-context VLM that takes the demonstration tour video and the multimodal user instruction as input to find the goal frame in the tour video. Next, a low-level policy uses the goal frame and an offline constructed topological graph to generate robot actions at every timestep. We evaluated Mobility VLA in a 836m^2 real world environment and show that Mobility VLA has a high end-to-end success rates on previously unsolved multimodal instructions such as "Where should I return this?" while holding a plastic bin. </i></p>
</div>
</td>
</tr>
<tr>
<td width="40%" valign="top" align="center"><a href="https://umi-on-legs.github.io">
<video playsinline autoplay loop muted src="images/umi-on-legs-clip.mp4" poster="./images/loading-icon.gif" alt="sym" width="90%" style="padding-top:0px;padding-bottom:0px;border-radius:15px;"></video>
</a></td>
<td width="60%" valign="top">
<p><a href="https://umi-on-legs.github.io" id="umi_on_legs">
<heading>UMI on Legs: Making Manipulation Policies Mobile with Manipulation-Centric Whole-body Controllers</heading></a><br>
Huy Ha*, Yihuai Gao*, Zipeng Fu, Jie Tan, Shuran Song<br>
CoRL 2024<br>
</p>
<div class="paper" id="umi_on_legs">
<a href="https://umi-on-legs.github.io">webpage</a> |
<a href="https://umi-on-legs.github.io/static/umi-on-legs.pdf">pdf</a> |
<a href="javascript:toggleblock('umi_on_legs_abs')">abstract</a> |
<a href="https://arxiv.org/abs/2407.10353">arXiv</a> |
<a href="https://github.com/real-stanford/umi-on-legs">code</a> |
<a href="https://youtu.be/4Bp0q3xHTxE">video</a>
<p align="justify"> <i id="umi_on_legs_abs">We introduce UMI-on-Legs, a new framework that combines real-world and simulation data for quadruped manipulation systems. We scale task-centric data collection in the real world using a hand-held gripper (UMI), providing a cheap way to demonstrate task-relevant manipulation skills without a robot. Simultaneously, we scale robot-centric data in simulation by training whole-body controller for task-tracking without task simulation setups. The interface between these two policies is end-effector trajectories in the task frame, inferred by the manipulation policy and passed to the whole-body controller for tracking. We evaluate UMI-on-Legs on prehensile, non-prehensile, and dynamic manipulation tasks, and report over 70% success rate on all tasks. Lastly, we demonstrate the zero-shot cross-embodiment deployment of a pre-trained manipulation policy checkpoint from prior work, originally intended for a fixed-base robot arm, on our quadruped system. We believe this framework provides a scalable path towards learning expressive manipulation skills on dynamic robot embodiments. </i></p>
</div>
</td>
</tr>
<tr>
<td width="40%" valign="top" align="center"><a href="https://robotics-transformer-x.github.io">
<video playsinline autoplay loop muted src="images/rtx-clip.mp4" poster="./images/loading-icon.gif" alt="sym" width="90%" style="padding-top:0px;padding-bottom:0px;border-radius:15px;"></video>
</a></td>
<td width="60%" valign="top">
<p><a href="https://robotics-transformer-x.github.io" id="rtx">
<heading>Open X-Embodiment: Robotic Learning Datasets and RT-X Models</heading></a><br>
Open X-Embodiment Collaboration led by Google DeepMind<br>
ICRA 2024<br>
<b style="color:rgb(255, 100, 100);">Best Paper Award</b>
</p>
<div class="paper" id="rtx">
<a href="https://robotics-transformer-x.github.io">webpage</a> |
<a href="https://arxiv.org/pdf/2310.08864">pdf</a> |
<a href="javascript:toggleblock('rtx_abs')">abstract</a> |
<a href="https://arxiv.org/abs/2310.08864">arXiv</a> |
<a href="https://github.com/google-deepmind/open_x_embodiment">code</a> |
<a href="https://deepmind.google/discover/blog/scaling-up-learning-across-many-different-robot-types/">blog</a>
<p align="justify"> <i id="rtx_abs">Large, high-capacity models trained on diverse datasets have shown remarkable successes on efficiently tackling downstream applications. In domains from NLP to Computer Vision, this has led to a consolidation of pretrained models, with general pretrained backbones serving as a starting point for many applications. Can such a consolidation happen in robotics? Conventionally, robotic learning methods train a separate model for every application, every robot, and even every environment. Can we instead train “generalist” X-robot policy that can be adapted efficiently to new robots, tasks, and environments? In this paper, we provide datasets in standardized data formats and models to make it possible to explore this possibility in the context of robotic manipulation, alongside experimental results that provide an example of effective X-robot policies. We assemble a dataset from 22 different robots collected through a collaboration between 21 institutions, demonstrating 527 skills (160266 tasks). We show that a high-capacity model trained on this data, which we call RT-X, exhibits positive transfer and improves the capabilities of multiple robots by leveraging experience from other platforms.</i></p>
</div>
</td>
</tr>
<tr>
<td width="40%" valign="top" align="center"><a href="https://robot-parkour.github.io">
<video playsinline autoplay loop muted src="images/parkour-clip.mp4" poster="./images/loading-icon.gif" alt="sym" width="90%" style="padding-top:0px;padding-bottom:0px;border-radius:15px;"></video>
</a></td>
<td width="60%" valign="top">
<p><a href="https://robot-parkour.github.io" id="PARKOUR">
<heading>Robot Parkour Learning</heading></a><br>
Ziwen Zhuang*, Zipeng Fu*, Jianren Wang, Chris Atkeson, Sören Schwertfeger, Chelsea Finn, Hang Zhao<br>
CoRL 2023 (Oral)<br>
<b style="color:rgb(255, 100, 100);">Best Systems Paper Award Finalist (top 3)</b>
</p>
<div class="paper" id="parkour">
<a href="https://robot-parkour.github.io">webpage</a> |
<a href="https://robot-parkour.github.io/resources/Robot_Parkour_Learning.pdf">pdf</a> |
<a href="javascript:toggleblock('parkour_abs')">abstract</a> |
<a shape="rect" href="javascript:togglebib('parkour')" class="togglebib">bibtex</a> |
<a href="https://arxiv.org/abs/2309.05665">arXiv</a> |
<a href="https://github.com/ZiwenZhuang/parkour">code</a> |
<a href="https://www.youtube.com/watch?v=M7lDCSF0KP0">video</a>
<p align="justify"> <i id="parkour_abs">Parkour is a grand challenge for legged locomotion that requires robots to overcome various obstacles rapidly in complex environments. Existing methods can generate either diverse but blind locomotion skills or vision-based but specialized skills by using reference animal data or complex rewards. However, autonomous parkour requires robots to learn generalizable skills that are both vision-based and diverse to perceive and react to various scenarios. In this work, we propose a system for learning a single end-to-end vision-based parkour policy of diverse parkour skills using a simple reward without any reference motion data. We develop a reinforcement learning method inspired by direct collocation to generate parkour skills, including climbing over high obstacles, leaping over large gaps, crawling beneath low barriers, squeezing through thin slits, and running. We distill these skills into a single vision-based parkour policy and transfer it to a quadrupedal robot using its egocentric depth camera. We demonstrate that our system can empower two different low-cost robots to autonomously select and execute appropriate parkour skills to traverse challenging real-world environments.</i></p>
<pre xml:space="preserve">
@inproceedings{zhuang2023robot,
author = {Zhuang, Ziwen and Fu, Zipeng and
Wang, Jianren and Atkeson, Christopher and
Schwertfeger, Sören and Finn, Chelsea and
Zhao, Hang},
title = {Robot Parkour Learning},
booktitle = {Conference on Robot Learning ({CoRL})},
year = {2023}
}
</pre>
</div>
</td>
</tr>
<tr>
<td width="40%" valign="top" align="center"><a href="https://manipulation-locomotion.github.io">
<video playsinline autoplay loop muted src="images/wbc-clip.mp4" poster="./images/loading-icon.gif" alt="sym" width="90%" style="padding-top:0px;padding-bottom:0px;border-radius:15px;"></video>
</a></td>
<td width="60%" valign="top">
<p><a href="https://manipulation-locomotion.github.io" id="MANIPLOCO">
<heading>Deep Whole-Body Control: Learning a Unified Policy for Manipulation and Locomotion</heading></a><br>
Zipeng Fu*, Xuxin Cheng*, Deepak Pathak<br>
CoRL 2022 (Oral)<br>
<b style="color:rgb(255, 100, 100);">Best Systems Paper Award Finalist (top 4)</b>
</p>
<div class="paper" id="maniploco">
<a href="https://manipulation-locomotion.github.io">webpage</a> |
<a href="https://arxiv.org/pdf/2210.10044.pdf">pdf</a> |
<a href="javascript:toggleblock('maniploco_abs')">abstract</a> |
<a shape="rect" href="javascript:togglebib('maniploco')" class="togglebib">bibtex</a> |
<a href="https://arxiv.org/abs/2210.10044">arXiv</a> |
<a href="https://openreview.net/forum?id=zldI4UpuG7v">OpenReview</a> |
<a href="https://www.youtube.com/watch?v=i9EdPl8uJUA">video</a>
<p align="justify"> <i id="maniploco_abs">An attached arm can significantly increase the applicability of legged robots to several mobile manipulation tasks that are not possible for the wheeled or tracked counterparts. The standard control pipeline for such legged manipulators is to decouple the controller into that of manipulation and locomotion. However, this is ineffective and requires immense engineering to support coordination between the arm and legs, error can propagate across modules causing non-smooth unnatural motions. It is also biological implausible where there is evidence for strong motor synergies across limbs. In this work, we propose to learn a unified policy for whole-body control of a legged manipulator using reinforcement learning. We propose Regularized Online Adaptation to bridge the Sim2Real gap for high-DoF control, and Advantage Mixing exploiting the causal dependency in the action space to overcome local minima during training the whole-body system. We also present a simple design for a low-cost legged manipulator, and find that our unified policy can demonstrate dynamic and agile behaviors across several task setups.</i></p>
<pre xml:space="preserve">
@inproceedings{fu2022deep,
author = {Fu, Zipeng and Cheng, Xuxin and
Pathak, Deepak},
title = {Deep Whole-Body Control: Learning a Unified Policy
for Manipulation and Locomotion},
booktitle = {Conference on Robot Learning ({CoRL})},
year = {2022}
}
</pre>
</div>
</td>
</tr>
<tr>
<td width="40%" valign="top" align="center"><a href="https://navigation-locomotion.github.io/">
<video playsinline autoplay loop muted src="images/nav-clip.mp4" poster="./images/loading-icon.gif" alt="sym" width="90%" style="padding-top:0px;padding-bottom:0px;border-radius:15px;"></video>
</a></td>
<td width="60%" valign="top">
<p><a href="https://navigation-locomotion.github.io/" id="NAVLOCO">
<heading>Coupling Vision and Proprioception for Navigation of Legged Robots</heading></a><br>
Zipeng Fu*, Ashish Kumar*, Ananye Agarwal, Haozhi Qi, Jitendra Malik, Deepak Pathak<br>
CVPR 2022<br>
<b>Best Paper at Multimodal Learning Workshop</b>
</p>
<div class="paper" id="navloco">
<a href="https://navigation-locomotion.github.io/">webpage</a> |
<a href="https://arxiv.org/pdf/2112.02094.pdf">pdf</a> |
<a href="javascript:toggleblock('navloco_abs')">abstract</a> |
<a shape="rect" href="javascript:togglebib('navloco')" class="togglebib">bibtex</a> |
<a href="https://arxiv.org/abs/2112.02094">arXiv</a> |
<a href="https://github.com/MarkFzp/navigation-locomotion">code</a> |
<a href="https://www.youtube.com/watch?v=sZVvutQUAQ4">video</a>
<p align="justify"> <i id="navloco_abs">We exploit the complementary strengths of vision and proprioception to develop a point-goal navigation system for legged robots, called VP-Nav. Legged systems are capable of traversing more complex terrain than wheeled robots, but to fully utilize this capability, we need a high-level path planner in the navigation system to be aware of the walking capabilities of the low-level locomotion policy in varying environments. We achieve this by using proprioceptive feedback to ensure the safety of the planned path by sensing unexpected obstacles like glass walls, terrain properties like slipperiness or softness of the ground and robot properties like extra payload that are likely missed by vision. The navigation system uses onboard cameras to generate an occupancy map and a corresponding cost map to reach the goal. A fast marching planner then generates a target path. A velocity command generator takes this as input to generate the desired velocity for the walking policy. A safety advisor module adds sensed unexpected obstacles to the occupancy map and environment-determined speed limits to the velocity command generator. We show superior performance compared to wheeled robot baselines, and ablation studies which have disjoint high-level planning and low-level control. We also show the real-world deployment of VP-Nav on a quadruped robot with onboard sensors and computation.</i></p>
<pre xml:space="preserve">
@inproceedings{fu2021coupling,
author = {Fu, Zipeng and Kumar, Ashish and
Agarwal, Ananye and Qi, Haozhi and
Malik, Jitendra and Pathak, Deepak},
title = {Coupling Vision and Proprioception
for Navigation of Legged Robots},
booktitle = {{CVPR}},
year = {2022}
}
</pre>
</div>
</td>
</tr>
<tr>
<td width="40%" valign="top" align="center"><a href="https://energy-locomotion.github.io/">
<video playsinline autoplay loop muted src="images/gait-clip.mp4" alt="sym" width="90%" style="padding-top:0px;padding-bottom:0px;border-radius:15px;"></video>
</a></td>
<td width="60%" valign="top">
<p><a href="https://energy-locomotion.github.io/" id="ENERGYLOCO">
<heading>Minimizing Energy Consumption Leads to the Emergence of Gaits in Legged Robots</heading></a><br>
Zipeng Fu, Ashish Kumar, Jitendra Malik, Deepak Pathak<br>
CoRL 2021
</p>
<div class="paper" id="energyloco">
<a href="https://energy-locomotion.github.io/">webpage</a> |
<a href="https://arxiv.org/pdf/2111.01674.pdf">pdf</a> |
<a href="javascript:toggleblock('energyloco_abs')">abstract</a> |
<a shape="rect" href="javascript:togglebib('energyloco')" class="togglebib">bibtex</a> |
<a href="https://arxiv.org/abs/2111.01674">arXiv</a> |
<a href="https://openreview.net/forum?id=PfC1Jr6gvuP">OpenReview</a> |
<a href="https://www.youtube.com/watch?v=OQN5W2IAb9k">video</a>
<p align="justify"> <i id="energyloco_abs">Legged locomotion is commonly studied and expressed as a discrete set of gait patterns, like walk, trot, gallop, which are usually treated as given and pre-programmed in legged robots for efficient locomotion at different speeds. However, fixing a set of pre-programmed gaits limits the generality of locomotion. Recent animal motor studies show that these conventional gaits are only prevalent in ideal flat terrain conditions while real-world locomotion is unstructured and more like bouts of intermittent steps. What principles could lead to both structured and unstructured patterns across mammals and how to synthesize them in robots? In this work, we take an analysis-by-synthesis approach and learn to move by minimizing mechanical energy. We demonstrate that learning to minimize energy consumption is sufficient for the emergence of natural locomotion gaits at different speeds in real quadruped robots. The emergent gaits are structured in ideal terrains and look similar to that of horses and sheep. The same approach leads to unstructured gaits in rough terrains which is consistent with the findings in animal motor control. We validate our hypothesis in both simulation and real hardware across natural terrains.</i></p>
<pre xml:space="preserve">
@inproceedings{fu2021minimizing,
author = {Fu, Zipeng and Kumar, Ashish and Malik, Jitendra and Pathak, Deepak},
title = {Minimizing Energy Consumption Leads to the Emergence of Gaits in Legged Robots},
booktitle = {Conference on Robot Learning (CoRL)},
year = {2021}
}
</pre>
</div>
</td>
</tr>
<tr>
<td width="40%" valign="top" align="center"><a href="https://ashish-kmr.github.io/rma-legged-robots/">
<video playsinline autoplay loop muted src="images/rma-clip.mp4" alt="sym" width="90%" style="padding-top:0px;padding-bottom:0px;border-radius:15px;"></video>
<!-- <img src="images/rma.gif" alt="sym" width="90%" style="padding-top:0px;padding-bottom:0px;border-radius:15px;"> -->
</a></td>
<td width="60%" valign="top">
<p><a href="https://ashish-kmr.github.io/rma-legged-robots/" id="RMA">
<!-- <img src="images/new.png" alt="[NEW]" width="6%" style="border-style: none"> -->
<heading>RMA: Rapid Motor Adaptation for Legged Robots</heading></a><br>
Ashish Kumar, Zipeng Fu, Deepak Pathak, Jitendra Malik<br>
RSS 2021
</p>
<div class="paper" id="rma">
<a href="https://ashish-kmr.github.io/rma-legged-robots/">webpage</a> |
<a href="https://arxiv.org/pdf/2107.04034.pdf">pdf</a> |
<a href="javascript:toggleblock('rma_abs')">abstract</a> |
<a shape="rect" href="javascript:togglebib('rma')" class="togglebib">bibtex</a> |
<a href="https://arxiv.org/abs/2107.04034">arXiv</a> |
<a href="https://youtu.be/nBy1piJrq1A">video</a>
<p align="justify"> <i id="rma_abs">Successful real-world deployment of legged robots would require them to adapt in real-time to unseen scenarios like changing terrains, changing payloads, wear and tear. This paper presents Rapid Motor Adaptation (RMA) algorithm to solve this problem of real-time online adaptation in quadruped robots. RMA consists of two components: a base policy and an adaptation module. The combination of these components enables the robot to adapt to novel situations in fractions of a second. RMA is trained completely in simulation without using any domain knowledge like reference trajectories or predefined foot trajectory generators and is deployed on the A1 robot without any fine-tuning. We train RMA on a varied terrain generator using bioenergetics-inspired rewards and deploy it on a variety of difficult terrains including rocky, slippery, deformable surfaces in environments with grass, long vegetation, concrete, pebbles, stairs, sand, etc. RMA shows state-of-the-art performance across diverse real-world as well as simulation experiments.</i></p>
<pre xml:space="preserve">
@inproceedings{kumar2021rma,
author = {Kumar, Ashish and Fu, Zipeng and Pathak, Deepak and Malik, Jitendra},
title = {RMA: Rapid Motor Adaptation for Legged Robots},
booktitle = {Robotics: Science and Systems (RSS)},
year = {2021}
}
</pre>
</div>
<p>
Media Coverage: <a href="https://ai.facebook.com/blog/ai-now-enables-robots-to-adapt-rapidly-to-changing-real-world-conditions"> Facebook AI</a> |
<a href="https://www.wsj.com/articles/ai-gives-robots-more-room-to-roam-11625832001">Wall Street Journal</a> |
<a href=https://techcrunch.com/2021/07/09/stumble-proof-robot-adapts-to-challenging-terrain-in-real-time/?tpcc=ECTW2020">TechCrunch</a> |
<a href="https://www.forbes.com/sites/martineparis/2021/07/09/facebook-robots-are-getting-smarter-watch-them-in-the-wild-video/?sh=3997033644ef">Forbes</a> |
<a href="https://www.washingtonpost.com/technology/2021/07/12/facebook-ai-robotics/">Washington Post</a> |
<a href="https://www.darpa.mil/news-events/2022-22-06">DARPA</a> |
<a href="https://www.cnet.com/news/facebook-teaches-ai-powered-robot-to-adapt-while-walking/">CNet</a> |
<a href="https://www.jiqizhixin.com/articles/2021-07-11">Synced Review</a> |
<a href="https://engineering.berkeley.edu/news/2021/07/rapid-motor-adaptation-enables-robots-to-navigate-real-world/">UC Berkeley</a> |
<a href="https://www.cmu.edu/news/stories/archives/2021/july/legged-robots-adapt.html">CMU</a>
</p>
</td>
</tr>
<tr>
<td width="40%" valign="top" align="center"><a href="https://arxiv.org/pdf/2110.00121.pdf"><img src="images/NeurIPS2019_CG_img1.png" alt="sym" width="90%" style="padding-top:0px;padding-bottom:0px;border-radius:15px;"></a></td>
<td width="60%" valign="top">
<p><a href="https://arxiv.org/pdf/2110.00121.pdf" id="COLLAB_MARL">
<heading>Emergence of Theory of Mind Collaboration in Multiagent Systems</heading></a><br>
Luyao Yuan, Zipeng Fu, Linqi Zhou, Kexin Yang, Song-Chun Zhu<br>
Emergent Communication Workshop<br>
NeurIPS 2019</b>
</p>
<div class="paper" id="collab_marl">
<a href="https://arxiv.org/pdf/2110.00121.pdf">pdf</a> |
<a href="javascript:toggleblock('collab_marl_abs')">abstract</a> |
<a shape="rect" href="javascript:togglebib('collab_marl')" class="togglebib">bibtex</a> |
<a href="https://arxiv.org/pdf/2110.00121">arXiv</a> |
<a href="https://github.com/MarkFzp/ToM-Collaboration">code</a>
<p align="justify"> <i id="collab_marl_abs">Currently, in the study of multiagent systems, the intentions of agents are usually ignored. Nonetheless, as pointed out by Theory of Mind (ToM), people regularly reason about other’s mental states, including beliefs, goals, and intentions, to obtain performance advantage in competition, cooperation or coalition. However, due to its intrinsic recursion and intractable modeling of distribution over belief, integrating ToM in multiagent planning and decision making is still a challenge. In this paper, we incorporate ToM in multiagent partially observable Markov decision process (POMDP) and propose an adaptive training algorithm to develop effective collaboration between agents with ToM. We evaluate our algorithms with two games, where our algorithm surpasses all previous decentralized execution algorithms without modeling ToM.</i></p>
<pre xml:space="preserve">
@article{yuan2019emergencecollaboration,
author = {Yuan, Luyao and Fu, Zipeng and Zhou, Linqi and Yang, Kexin and Zhu, Song-Chun},
journal= {Emergent Communication Workshop at NeurIPS},
year = {2019}
}
</pre>
</div>
</td>
</tr>
<tr>
<td width="40%" valign="top" align="center"><a href="https://arxiv.org/pdf/2001.07752.pdf"><img src="images/AAMAS2020_Pragmatics_img.png" alt="sym" width="90%" style="padding-top:0px;padding-bottom:0px;border-radius:15px;"></a></td>
<td width="60%" valign="top">
<p><a href="https://arxiv.org/pdf/2001.07752.pdf" id="PRAGMATICS_MARL">
<heading>Emergence of Pragmatics from Referential Game between Theory of Mind Agents</heading></a><br>
Luyao Yuan, Zipeng Fu, Jingyue Shen, Lu Xu, Junhong Shen, Song-Chun Zhu<br>
Emergent Communication Workshop<br>
NeurIPS 2019</b>
</p>
<div class="paper" id="pragmatics_marl">
<a href="https://arxiv.org/pdf/2001.07752.pdf">pdf</a> |
<a href="javascript:toggleblock('pragmatics_marl_abs')">abstract</a> |
<a shape="rect" href="javascript:togglebib('pragmatics_marl')" class="togglebib">bibtex</a> |
<a href="https://arxiv.org/pdf/2001.07752">arXiv</a> |
<a href="https://github.com/MarkFzp/ToM-Pragmatics">code</a>
<p align="justify"> <i id="pragmatics_marl_abs">Pragmatics studies how context can contribute to language meanings. In human communication, language is never interpreted out of context, and sentences can usually convey more information than their literal meanings. However, this mechanism is missing in most multi-agent systems, restricting the communication efficiency and the capability of human-agent interaction. In this paper, we propose an algorithm, using which agents can spontaneously learn the ability to “read between lines” without any explicit hand-designed rules. We integrate theory of mind (ToM) in a cooperative multi-agent pedagogical situation and propose an adaptive reinforcement learning (RL) algorithm to develop a communication protocol. ToM is a profound cognitive science concept, claiming that people regularly reason about other’s mental states, including beliefs, goals, and intentions, to obtain performance advantage in competition, cooperation or coalition. With this ability, agents consider language as not only messages but also rational acts reflecting others hidden states. Our experiments demonstrate the advantage of pragmatic protocols over non-pragmatic protocols. We also show the teaching complexity following the pragmatic protocol empirically approximates to recursive teaching dimension (RTD).</i></p>
<pre xml:space="preserve">
@article{yuan2019emergencepragmatics,
author = {Yuan, Luyao and Fu, Zipeng and Shen, Jingyue and Xu, Lu and Shen, Junhong and Zhu, Song-Chun},
journal= {Emergent Communication Workshop at NeurIPS},
year = {2019}
}
</pre>
</div>
</td>
</tr>
<tr>
<td width="40%" valign="top" align="center"><a href="data/AOG_MCTS.pdf"><img src="images/aog_mcts_img2.png" alt="sym" width="90%" style="padding-top:0px;padding-bottom:0px;border-radius:15px;"></a></td>
<td width="60%" valign="top">
<p><a href="data/AOG_MCTS.pdf" id="AOG_MCTS">
<heading>Unsupervised Incremental Structure Learning of Stochastic And-Or Grammars with Monte Carlo Tree Search</heading></a><br>
Luyao Yuan, Jingyue Shen, Zipeng Fu, Song-Chun Zhu<br>
Preprint 2019</b>
</p>
<div class="paper" id="aog_mcts">
<a href="data/AOG_MCTS.pdf">pdf</a> |
<a href="javascript:toggleblock('aog_mcts_abs')">abstract</a> |
<a shape="rect" href="javascript:togglebib('aog_mcts')" class="togglebib">bibtex</a> |
<a href="https://github.com/MarkFzp/and-or-graph-lib">code (And-Or-Graph Learning Library)</a>
<p align="justify"><i id="aog_mcts_abs">Stochastic And-Or grammars form a compact representation of probabilistic contextfree grammars. They explicitly model compositionality and reconfigurability in a hierarchical manner and can be utilized to understand the underlying structures of different kinds of data (e.g., language, image, or video). In this paper, we proposed an unsupervised AndOr grammar learning approach that iteratively searches for better grammar structure and parameters to optimize the grammar compactness and data likelihood. To handle the complexity of grammar learning, we developed an algorithm based on the Monte Carlo Tree Search to effectively explore the search space. Also, our method enables incremental grammar learning. Experimental results show that our approach significantly outperforms previous greedy-search-based approaches, and our incremental learning results are comparable to previous batch learning results.</i></p>
<pre xml:space="preserve">
@article{yuan2019stochastic,
author = {Yuan, Luyao and Shen, Jingyue and Fu, Zipeng and Zhu, Song-Chun},
journal= {Preprint},
year = {2019}
}
</pre>
</div>
</td>
</tr>
<tr>
<td width="40%" valign="top" align="center"><a href="data/JNCS2019_Review.pdf"><img src="images/JNSC2019_Review_img.png" alt="sym" width="90%" style="padding-top:0px;padding-bottom:0px;border-radius:15px;"></a></td>
<td width="60%" valign="top">
<p><a href="data/JNCS2019_Review.pdf" id="MATERIAL_REVIEW">
<heading>Machine Learning for Glass Science and Engineering: A Review</heading></a><br>
Han Liu, Zipeng Fu, Kai Yang, Xinyi Xu, Mathieu Bauchy<br>
Journal of Non-Crystalline Solids 2019</b>
</p>
<div class="paper" id="material_review">
<a href="data/JNCS2019_Review.pdf">pdf</a> |
<a href="javascript:toggleblock('material_review_abs')">abstract</a> |
<a shape="rect" href="javascript:togglebib('material_review')" class="togglebib">bibtex</a>
<p align="justify"><i id="material_review_abs">The design of new glasses is often plagued by poorly efficient Edisonian “trial-and-error” discovery approaches. As an alternative route, the Materials Genome Initiative has largely popularized new approaches relying on artificial intelligence and machine learning for accelerating the discovery and optimization of novel, advanced materials. Here, we review some recent progress in adopting machine learning to accelerate the design of new glasses with tailored properties.</i></p>
<pre xml:space="preserve">
@article{liu2019machine,
author = {Liu, Han and Fu, Zipeng and Yang, Kai and Xu, Xinyi and Bauchy, Mathieu},
journal= {Journal of Non-Crystalline Solids},
year = {2019}
}
</pre>
</div>
</td>
</tr>
<tr>
<td width="40%" valign="top" align="center"><a href="data/ACM_TURC2019_Adversarial.pdf"><img src="images/ACM_TURC2019_Adversarial_img.png" alt="sym" width="90%" style="padding-top:0px;padding-bottom:0px;border-radius:15px;"></a></td>
<td width="60%" valign="top">
<p><a href="data/ACM_TURC2019_Adversarial.pdf" id="ACM_TURC">
<heading>Adversarial Attack Against Scene Recognition System for Unmanned Vehicles</heading></a><br>
Xuankai Wang, Mi Wen, Jinguo Li, Zipeng Fu and Rongxing Lu<br>
ACM TURC 2019</b>
</p>
<div class="paper" id="acm_turc">
<a href="data/ACM_TURC2019_Adversarial.pdf">pdf</a> |
<a href="javascript:toggleblock('acm_turc_abs')">abstract</a> |
<a shape="rect" href="javascript:togglebib('acm_turc')" class="togglebib">bibtex</a>
<p align="justify"><i id="acm_turc_abs">Unmanned scene recognition means that unmanned vehicles can collect environmental data from equipped sensors and make decisions through algorithms, in which deep learning has become one of key technologies. Especially, with the discovery of adversarial examples against deep learning, the research on offensive and defensive against adversarial examples illustrates that the deep learning model for unmanned scene recognition also has the safety vulnerability. However, as far as we know, few studies have tried to explore the adversarial example attack in this field. Therefore, we try to address this problem by generating adversarial examples againist scene recognition classification model through experiments. In addition, we also try to improve the adversarial model robustness by the adversarial training. Extensive experiments have been conducted, and experimental results show that adversarial examples have an efficient attack effect on the neural network for scene recognition.</i></p>
<pre xml:space="preserve">
@article{wang2019adversarial,
author = {Wang, Xuankai and Li, Jinguo and Fu, Zipeng and Lu, Rongxing},
journal= {ACM TURC},
year = {2019}
}
</pre>
</div>
</td>
</tr>
<tr>
<td width="40%" valign="top" align="center"><a href="data/IEEE_IoT2019_Energy.pdf"><img src="images/IEEE_IoT2019_Energy_img.png" alt="sym" width="90%" style="padding-top:0px;padding-bottom:0px;border-radius:15px;"></a></td>
<td width="60%" valign="top">
<p><a href="data/IEEE_IoT2019_Energy.pdf" id="IEEE_IoT">
<heading>Energy Theft Detection With Energy Privacy Preservation in the Smart Grid</heading></a><br>
Donghuan Yao, Mi Wen, Xiaohui Liang, Zipeng Fu, Kai Zhang, Baojia Yang<br>
IEEE IoT Journal 2019</b>
</p>
<div class="paper" id="ieee_iot">
<a href="data/IEEE_IoT2019_Energy.pdf">pdf</a> |
<a href="javascript:toggleblock('ieee_iot_abs')">abstract</a> |
<a shape="rect" href="javascript:togglebib('ieee_iot')" class="togglebib">bibtex</a>
<p align="justify"><i id="ieee_iot_abs">As a prominent early instance of the Internet of Things in the smart grid, the advanced metering infrastructure (AMI) provides real-time information from smart meters to both grid operators and customers, exploiting the full potential of demand response. However, the newly collected information without security protection can be maliciously altered and result in huge loss. In this paper, we propose an energy theft detection scheme with energy privacy preservation in the smart grid. Especially, we use combined convolutional neural networks (CNNs) to detect abnormal behavior of the metering data from a long-period pattern observation. In addition, we employ Paillier algorithm to protect the energy privacy. In other words, the users’ energy data are securely protected in the transmission and the data disclosure is minimized. Our security analysis demonstrates that in our scheme data privacy and authentication are both achieved. Experimental results illustrate that our modified CNN model can effectively detect abnormal behaviors at an accuracy up to 92.67%.</i></p>
<pre xml:space="preserve">
@article{yao2019energy,
author = {Yao, Donghuan and Liang, Xiaohui and Fu, Zipeng and Zhang, Kai and Yang, Baojia},
journal= {IEEE Internet of Things Journal},
year = {2019}
}
</pre>
</div>
</td>
</tr>
</table>
<hr/>
<table width="100%" align="center" border="0" cellspacing="0" cellpadding="2">
<tr><td><br><p align="right">
Website template from <a href="http://www.cs.berkeley.edu/~barron/">here</a> and <a href="http://www.cs.cmu.edu/~dpathak/">here</a>
</font></p></td></tr>
</table>
</td></tr>
</table>
<script xml:space="preserve" language="JavaScript">
hideallbibs();
</script>
<script xml:space="preserve" language="JavaScript">
hideblock('material_review_abs');
</script>
<script xml:space="preserve" language="JavaScript">
hideblock('ieee_iot_abs');
</script>
<script xml:space="preserve" language="JavaScript">
hideblock('acm_turc_abs');
</script>
<script xml:space="preserve" language="JavaScript">
hideblock('aog_mcts_abs');
</script>
<script xml:space="preserve" language="JavaScript">
hideblock('pragmatics_marl_abs');
</script>
<script xml:space="preserve" language="JavaScript">
hideblock('collab_marl_abs');
</script>
<script xml:space="preserve" language="JavaScript">
hideblock('rma_abs');
</script>
<script xml:space="preserve" language="JavaScript">
hideblock('energyloco_abs');
</script>
<script xml:space="preserve" language="JavaScript">
hideblock('navloco_abs');
</script>
<script xml:space="preserve" language="JavaScript">
hideblock('maniploco_abs');
</script>
<script xml:space="preserve" language="JavaScript">
hideblock('parkour_abs');
</script>
<script xml:space="preserve" language="JavaScript">
hideblock('mobile_aloha_abs');
</script>
<script xml:space="preserve" language="JavaScript">
hideblock('humanplus_abs');
</script>
<script xml:space="preserve" language="JavaScript">
hideblock('mobility_vla_abs');
</script>
<script xml:space="preserve" language="JavaScript">
hideblock('umi_on_legs_abs');
</script>
<script xml:space="preserve" language="JavaScript">
hideblock('rtx_abs');
</script>
</body>
</html>