forked from SigWeber/DurableSolutions_simulation
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfull_report.Rmd
3500 lines (2728 loc) · 297 KB
/
full_report.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
---
title: Towards a harmonized statistical measure for exits from the stock of internally
displaced persons
date: "`r format(Sys.time(), '%B %d, %Y')`"
output:
pdf_document:
toc: yes
keep_tex: no
fig_caption: yes
latex_engine: pdflatex
template: design/preamble.tex
linkcolor: blue!50!black
link-citations: yes
geometry: margin=1in
fontsize: 11pt
spacing: single
always_allow_html: yes
---
```{r setup, include=FALSE}
library(tidyverse)
library(hrbrthemes)
library(patchwork)
library(kableExtra)
library(DiagrammeR)
library(targets)
knitr::opts_chunk$set(echo = F, message = F, warning = F, results = 'asis', cache = T, fig.align="center")
```
```{r nigeria_targets}
DS_Original_nigeria <- targets::tar_read("DS_Original_nigeria") %>% select(-Durable_Solutions)
DS_Option2_nigeria <- targets::tar_read("DS_Option2_nigeria") %>% select(-Durable_Solutions)
DS_Option3_nigeria <- targets::tar_read("DS_Option3_nigeria") %>% select(-Durable_Solutions)
DS_Option4_nigeria <- targets::tar_read("DS_Option4_nigeria") %>% select(-Durable_Solutions)
DS_Option5_nigeria <- targets::tar_read("DS_Option5_nigeria") %>% select(-Durable_Solutions)
DS_Option1_nigeria <- targets::tar_read("DS_Option1_nigeria") %>% select(-Durable_Solutions)
DS_Option6_nigeria <- read_csv("ecdf/nigeria.csv") %>% select(-Durable_Solutions)
DS_Original_nohlp_nigeria <- targets::tar_read("DS_Original_nohlp_nigeria") %>% select(-Durable_Solutions)
DS_Option1_nohlp_nigeria <- targets::tar_read("DS_Option1_nohlp_nigeria") %>% select(-Durable_Solutions)
DS_Option2_nohlp_nigeria <- targets::tar_read("DS_Option2_nohlp_nigeria") %>% select(-Durable_Solutions)
DS_Option3_nohlp_nigeria <- targets::tar_read("DS_Option3_nohlp_nigeria") %>% select(-Durable_Solutions)
DS_Option4_nohlp_nigeria <- targets::tar_read("DS_Option4_nohlp_nigeria") %>% select(-Durable_Solutions)
DS_Option5_nohlp_nigeria <- targets::tar_read("DS_Option5_nohlp_nigeria") %>% select(-Durable_Solutions)
DS_Option6_nohlp_nigeria <- read_csv("ecdf/nigeria_nohlp.csv") %>% select(-Durable_Solutions)
```
```{r hargeisa_targets}
DS_Original_hargeisa <- targets::tar_read("DS_Original_hargeisa") %>% select(-Durable_Solutions)
DS_Option1_hargeisa <- targets::tar_read("DS_Option1_hargeisa") %>% select(-Durable_Solutions)
DS_Option2_hargeisa <- targets::tar_read("DS_Option2_hargeisa") %>% select(-Durable_Solutions)
DS_Option3_hargeisa <- targets::tar_read("DS_Option3_hargeisa") %>% select(-Durable_Solutions)
DS_Option4_hargeisa <- targets::tar_read("DS_Option4_hargeisa") %>% select(-Durable_Solutions)
DS_Option5_hargeisa <- targets::tar_read("DS_Option5_hargeisa") %>% select(-Durable_Solutions)
DS_Option6_hargeisa <- read_csv("ecdf/hargeisa.csv") %>% select(-Durable_Solutions)
DS_Original_nohlp_hargeisa <- targets::tar_read("DS_Original_nohlp_hargeisa") %>% select(-Durable_Solutions)
DS_Option1_nohlp_hargeisa <- targets::tar_read("DS_Option1_nohlp_hargeisa") %>% select(-Durable_Solutions)
DS_Option2_nohlp_hargeisa <- targets::tar_read("DS_Option2_nohlp_hargeisa") %>% select(-Durable_Solutions)
DS_Option3_nohlp_hargeisa <- targets::tar_read("DS_Option3_nohlp_hargeisa") %>% select(-Durable_Solutions)
DS_Option4_nohlp_hargeisa <- targets::tar_read("DS_Option4_nohlp_hargeisa") %>% select(-Durable_Solutions)
DS_Option5_nohlp_hargeisa <- targets::tar_read("DS_Option5_nohlp_hargeisa") %>% select(-Durable_Solutions)
DS_Option6_nohlp_hargeisa <- read_csv("ecdf/hargeisa_nohlp.csv") %>% select(-Durable_Solutions)
```
```{r sudan_targets}
DS_Original_sudan <- targets::tar_read("DS_Original_sudan") %>% select(-Durable_Solutions)
DS_Option1_sudan <- targets::tar_read("DS_Option1_sudan") %>% select(-Durable_Solutions)
DS_Option2_sudan <- targets::tar_read("DS_Option2_sudan") %>% select(-Durable_Solutions)
DS_Option3_sudan <- targets::tar_read("DS_Option3_sudan") %>% select(-Durable_Solutions)
DS_Option4_sudan <- targets::tar_read("DS_Option4_sudan") %>% select(-Durable_Solutions)
DS_Option5_sudan <- targets::tar_read("DS_Option5_sudan") %>% select(-Durable_Solutions)
DS_Option6_sudan <- read_csv("ecdf/sudan.csv") %>% select(-Durable_Solutions)
DS_Original_nohlp_sudan <- targets::tar_read("DS_Original_nohlp_sudan") %>% select(-Durable_Solutions)
DS_Option1_nohlp_sudan <- targets::tar_read("DS_Option1_nohlp_sudan") %>% select(-Durable_Solutions)
DS_Option2_nohlp_sudan <- targets::tar_read("DS_Option2_nohlp_sudan") %>% select(-Durable_Solutions)
DS_Option3_nohlp_sudan <- targets::tar_read("DS_Option3_nohlp_sudan") %>% select(-Durable_Solutions)
DS_Option4_nohlp_sudan <- targets::tar_read("DS_Option4_nohlp_sudan") %>% select(-Durable_Solutions)
DS_Option5_nohlp_sudan <- targets::tar_read("DS_Option5_nohlp_sudan") %>% select(-Durable_Solutions)
DS_Option6_nohlp_sudan <-read_csv("ecdf/sudan_nohlp.csv") %>% select(-Durable_Solutions)
```
```{r colombia_targets}
DS_Original_colombia_hh <- targets::tar_read("DS_Original_colombia") %>% select(-Durable_Solutions)
DS_Option2_colombia_hh <- targets::tar_read("DS_Option2_colombia") %>% select(-Durable_Solutions)
DS_Option3_colombia_hh <- targets::tar_read("DS_Option3_colombia") %>% select(-Durable_Solutions)
DS_Option4_colombia_hh <- targets::tar_read("DS_Option4_colombia") %>% select(-Durable_Solutions)
DS_Option5_colombia_hh <- targets::tar_read("DS_Option5_colombia") %>% select(-Durable_Solutions)
DS_Option1_colombia_hh <- targets::tar_read("DS_Option1_colombia") %>% select(-Durable_Solutions)
DS_Option6_colombia_hh <- read_csv("ecdf/colombia.csv") %>% select(-Durable_Solutions)
```
\newpage
# Executive summary
The *International Recommendations on IDP Statistics* (IRIS), developed by the *Expert Group on Refugee, IDP and Statelessness Statistics* (EGRISS), provide a framework for capturing a country's stock of IDPs for statistical purposes. To correctly calculate the stock, it is crucial to define when IDP households and individuals enter the stock and when they exit the stock. This note relates to the latter, and specifically to the way by which households exit the stock by overcoming key displacement-related vulnerabilities - sometimes referred to colloquially as the "solutions measure". The IRIS have advanced the debate on a statistical measure on solutions considerably. Nevertheless, as stated in the recommendations themselves, these achievements do not yet result in a final and applicable statistical measure which can be applied directly in the work of statistical producers.
After revisiting the conceptual framework for the solutions measure set out in the IRIS, the paper identifies 9 methodological challenges that remain to allow the IRIS solutions measure to be fully applicable in statistical production. The paper then makes concrete recommendations for how EGRISS can refine and further mature the solutions measure to become a fully workable statistical instrument.
+ Challenge 1: Selection of a statistical metric
+ *Summary*: Different metric options are available to implement the two IRIS requirements that IDP households (1) meet each of the 10 sub-criteria capturing the overcoming of key displacement-related vulnerabilities and that IDP households are (2) compared to a benchmark population. The available metric options range from (1) a full composite index, (2) a composite metric at the criterion level, (3) a composite index at sub-criterion level, (4) a homogeneous cell approach, (5) a regression-based approach to (6) a cumulative distribution approach. Metric options that fully implement the requirement to pass IDP households on each sub-criterion (options 3+4) yield very little exits from the stock while options that average across all sub-criteria yield a higher number of exits (options 1, 5, 6). Option 2 requires further development to be made fully applicable.
+ *Recommendation*: EGRISS should not consider a composite index at the sub-criterion level (data needs to high) or a regression-based approach (not easy to compare). A composite measure at the criterion level and a homogeneous cell approach require further investigation but the homogeneous cell approach is likely to not work well in practice. A full composite index can only be considered if EGRISS clearly standardizes the set of indicators to make an assessment and identifies if any sub-criteria must be passed (e.g. freedom to move). If feasible for national statistical offices, an alternative is an approach based on empirical cumulative distributions.
+ Challenge 2: Selection of statistical indicators
+ *Summary*: The simulations show that the indicator selection, i.e. the question which indicators to choose to measure the 10 sub-criteria, matters less for the overall exit from the IDP stock than identifying a metric to combine the different indicators. Nevertheless, missing data, problems to aggregate to the household level, and non-applicability of indicators can create differences in the exit from the IDP stock, suggesting a need to standardize indicators across contexts.
+ *Recommendation*: It is suggested that EGRISS standardizes the indicators to operationalize the 10 sub-criteria as much as possible to increase comparability across contexts, relying on a selection of the SDG indicators and the Durable Solutions Library indicators. This note proposes a selection of indicators.
\newpage
+ Challenge 3: The property restitution and compensation sub-criterion
+ *Summary*: Out of the ten sub-criteria currently specified in the IRIS solutions measure, the sub-criterion *4.1 Property restitution and compensation* stands out as particularly restrictive in terms of allowing any exits from the IDP stock due to the fact that restitution or compensation mechanisms exist only in a very limited number of cases across the world. A direct implication of this is that the IRIS solutions measure, as it currently stands, will barely allow any exits from the IDP stock in practice.
+ *Recommendation*: It is suggested that the focus on HLP restitution is reinterpreted as security of tenure for the solutions measure. This would require re-labeling the sub-criterion and operationalizing it in a way that is applicable across displacement contexts.
+ Challenge 4: Dealing with missing data
+ *Summary*: IRIS posits that no assessment can be made and thus that households should remain in the IDP stock if data is missing for any of the solution measure’s sub-criteria. This is problematic given that a sizeable proportion of households – almost a third of households across the simulations - are affected by missing data. In the vast majority of cases, this data is missing “by design”, i.e. for perfectly valid reasons such as a household without children not being asked about current school attendance of children.
+ *Recommendation* For data missing “by design”, the IRIS recommendation should be adjusted and it should be clarified that missing data due to valid skip patterns and non-applicability should be interpreted as the absence of vulnerability in that particular indicator.
+ Challenge 5: Data aggregation to the household level
+ *Summary*: Related to the problem of missing data is the fact that some of the sub-criteria are more naturally measured on the individual-level than the household-level. To make a meaningful assessment of the household-level exit of IDPs from the overall stock, it should be made clear how data collected from individual respondents can be aggregated to the household level. Potential aggregation rules are a pass if all household members or if at least one member achieves a pass.
+ *Recommendation*: It is recommended that the IRIS should be adjusted and a specification should be added to explain how each sub-criterion should be aggregated to the household level.
+ Challenge 6-9:
+ *Summary*: Beyond the challenges assessed in this report, the IRIS also has to address how to deal with statistical uncertainty in the target/benchmark values (challenge 6), how to define the final comparator population (challenge 7), how to factor assistance into the solutions measure (challenge 8) and how to deal with changing benchmark values over time (challenge 9).
+ *Recommendation*: It is recommended to revisit these problems once more clarity on challenges 1-5 is achieved. This can include methodological analyses on the difference between low, best, and high estimates of IDP exits from the stock (to address challenge 6), the difference between host community and national averages in the same displacement contexts (to address challenge 7), and the change in IDP exits over time in panel data (to address challenge 9). Regarding the challenge of how to define the comparator population, a nationally representative benchmark is favorable compared to host community benchmarks.
\newpage
# Introduction
The *International Recommendations on IDP Statistics* (IRIS), developed by the *Expert Group on Refugee, IDP and Statelessness Statistics* (EGRISS), provide a framework for capturing a country's stock of IDPs for statistical purposes. To correctly calculate the stock, it is crucial to define when individuals enter the stock and when they exit the stock. This paper relates to the latter, and specifically to the way by which individuals exit the stock by overcoming key displacement-related vulnerabilities - sometimes referred to colloquially as the "IRIS solutions measure".
It is important to note that the purpose of a statistical definition for the "end of displacement" is explicitly **not** to identify specific households or individuals that are no longer displaced (e.g. for programming or assistance purposes) but only to enable aggregate statistics on the amount of internal displacement in each country that can be compared globally. Separate from the "solutions measure", IRIS also proposes a distinct but related framework to assess progress towards the achievement of durable solutions - colloquially referred to as the "progress measure". The progress measure aims to allow decision-makers "to understand at a glance in which aspects of vulnerability IDPs are struggling compared to others [...] and in which they are doing relatively well" (IRIS, page 55). While progress and solution can likely be measured with similar statistical indicators, their aims and application are very different: informing decisions on policy areas that need attention as well as more granular assistance-related decisions (through the IRIS progress measure) vs informing aggregate statistics on IDP stocks (IRIS solutions measure).
This note focuses on the IRIS solutions measure and how this measure can be implemented in practice. The IRIS have advanced the debate on a statistical measure on solutions considerably, including in the following ways:
- Specification of 5 priority criteria and 10 priority sub-criteria that should form the basis of a statistical measure
- Move to a context-dependent assessment of durable solutions rather than an absolute approach
- Narrowing of benchmark options (to national averages or host community averages)
- Recommendation of quality criteria to assess the suitability of indicators to measure each sub-criterion
\noindent Nevertheless, as stated in the recommendations themselves, the above achievements do not result in a final and applicable statistical measure which can be applied directly in the work of statistical producers. This methodological paper first outlines the conceptual framework for the solutions measure as suggested by the IRIS and identifies the remaining methodological challenges that persist in turning this into a fully applicable statistical measure. The main body of the paper then makes concrete suggestions, and provides decision points for EGRISS on how to overcome these challenges. The suggestions in this paper are backed up by empirical data from Hargeisa, Nigeria, Colombia and Sudan. Considering the absence of a concrete statistical measure at this stage, a simulation approach - cycling through all foreseeable metric and indicator choice for the solutions measure - is applied to illustrate exit from the IDP stock.
# State of play on the "IRIS solutions measure"
## The conceptual framework: ten sub-criteria and two principles
Based on the *Framework for Durable Solutions by the Inter-Agency Standing Committee* (IASC), the IRIS specifies a set of 5 priority criteria and 10 sub-criteria that need to be measured in every displacement context to take IDPs out of the national stock as having overcome their displacement-related vulnerabilities (see Table \ref{tab:IASC}). The indicators for each of the 10 sub-criteria should be collected at the household or individual level.
```{r}
IASC <- data.frame(
"Criteria" = c("1. Safety and security", " ",
"2. Adequate standard of living", " ", " "," ",
"3. Access to livelihoods", " ",
"4. Restoration of housing, land and property",
"5. Documentation"),
"Subcriteria" = c("1.1 Victims of violence","1.2 Freedom of movement",
"2.1 Food security", "2.2 Shelter and housing",
"2.3 Medical services", "2.4 Education",
"3.1 Employment and livelihoods", "3.2 Economic security",
"4.1 Property restitution and compensation", "5.1 Documentation")
)
knitr::kable(IASC,"latex", booktabs = TRUE,
linesep = "",
caption= "\\label{tab:IASC}IASC durable solution criteria and identified sub-criteria") %>%
kable_styling(latex_options = "hold_position", font_size=10)
```
\noindent To then assess how many displaced households in a specific context have progressed towards a durable solution, the situation of IDPs must be compared with that of the comparator population (national average or "host community" average). The logic is that IDPs that perform similar or better than the comparator population in each sub-criterion are no longer considered displaced for the purpose of statistics.^[Note that this framework is used to count the aggregate number of IDPs in a given context. It is not used to assess the allocation of aid to an individual or household.] The IRIS promulgates this as follows:
> *\footnotesize "The methodology used for calculating the composite measure is a simple pass/no pass (or binary) scenario at sub-criteria level that is then accumulated to produce a score at criteria level, and ultimately an overall score for the measure. [...] **To determine if a sub-criterion has been overcome or not, for each household, a target needs to be set** [...] It is foreseen that target setting will be more complex with categorical or binary indicators [...] If all sub-criteria receive a 'pass' mark, then that criterion [...] has been overcome. For criteria with multiple sub-criteria, **all sub-criteria would need to receive a 'pass' mark for the criteria to be overcome**. All of the five key-displacement related vulnerabilities (the five criteria) need to achieve a 'pass' mark for the composite measure to be fulfilled.[...] It is recommended to use the general/national population as the comparison group when deciding on the targets or thresholds for scoring each sub-criteria [...] In specific circumstances, thresholds can be set through a comparison with the average situation of a subset of the general population."* (IRIS, page 58-61)
\noindent To sum it up, the IRIS established that the solutions measure should be based on an assessment of the ten sub-criteria, and should follow two key principles, namely (a) benchmarking against a comparator, and (b) achieving a pass in *each* of the sub-criteria.
Table \ref{tab:example} provides an example to understand this framework. Household A passes the criteria *1. Safety & security*, *4. Restoration of HLP rights*, and *5. Documentation*. However, because it has not passed the two sub-criteria in *3. Access to livelihood*, it does not pass this criterion. The household also does not pass the criterion *2. Adequate standard of living* because it has not passed the sub-criterion *2.4 Education* and the framework requires to pass all sub-criteria. Overall, no durable solution has been achieved in this example and the household should not exit the IDP stock.
\begin{table}[htp!]
\centering
\footnotesize
\caption{Example household in the IRIS framework}
\label{tab:example}
\begin{tabular}{llll}
\toprule
& \multicolumn{2}{c}{Sub-criterion (compared to a benchmark)} & Result on criterion level \\
\midrule
\textbf{Household A} & 1.1 Victims of violence & Pass & 2/2 (Passed) \\
& 1.2 Freedom of movement & Pass & \\
\cmidrule{2-4}
& 2.1 Food security & Pass & 3/4 (Not passed) \\
& 2.2 Shelter and housing & Pass & \\
& 2.3 Medical services & Pass & \\
& 2.4 Education & No pass & \\
\cmidrule{2-4}
& 3.1 Employment \& livelihoods & No pass & 0/2 (Not passed) \\
& 3.2 Economic security & No pass & \\
\cmidrule{2-4}
& 4.1 Property restitution \& compensation & Pass & 1/1 (Passed) \\
\cmidrule{2-4}
& 5.1 Documentation & Pass & 1/1 (Passed) \\
\midrule
& \multicolumn{3}{l}{\textbf{Overall:} No durable solution achieved: displacement not ended} \\
\bottomrule
\end{tabular}
\end{table}
## Nine remaining methodological challenges
While this framework appears intuitive, there are several challenges and methodological gaps when implementing it in practice.
\noindent The first methodological challenge arises from the above-cited assumption that it is possible to set a comparator target/benchmark for each sub-criterion which a household needs to match or surpass. The IRIS states that “target setting will be more complex with categorical or binary indicators”; however, **such benchmark comparison is not just complex but indeed technically meaningless for binary and categorical indicators. Binary and categorical household-level data points cannot be directly compared with average/distribution values in the comparator population**. To illustrate this problem, imagine a binary indicator "household has access to medical services" was selected to operationalize sub-criterion *2.3 Medical services*. The value for any given household can only be yes or no, 1 or 0. The target value derived from the comparator population, in contrast, would be a percentage on a scale, let us assume 5% of the comparator population have access to medical services. Achieving a pass for any given household in comparison with the target value can only be achieved by achieving a "yes" in this indicator – despite the fact that access to medical services is extremely low in the comparator population. In effect, the value of the comparator population is irrelevant, because the condition for achieving a pass would not have changed if the comparator value had been, say, 1% or 90%. In other words, the entire idea of a contextualized target/benchmark that underlies the IRIS approach is devalued for binary or categorical indicators. Unfortunately, such types of indicators appear the pragmatic and thematically most valid choice of metric under the vast majority of sub-criteria.
The second methodological challenge is that **no set of statistical indicators has been agreed on to capture the 10 sub-criteria**. The lack of an agreed, measurable set of indicators presents a real challenge in operationalizing the IRIS framework. While this methodological assessment aims to shed some light on how specific indicators affect the exit from the IDP stock, further empirical work will be needed to assess how the indicator choice affects the aggregate stock of IDPs in a displacement context. Eventually, more suitable datasets will need to be produced to facilitate this.
A third challenge relates to one of the ten sub-criteria of the solutions measure, namely *4.1 Property restitution and compensation*. An implicit assumption in examining access to restitution or compensation mechanisms is that such mechanism are in place in displacement contexts. Experts on housing, land and property confirm that this is only the case in a limited number of cases across the world, and even where those mechanisms exist, there are issues in terms of equitable access to them and their effectiveness. Given the principles discussed above, this effectively bars household from exiting the IDP stock in the vast majority of countries. A decision on the **operationalization or the reframing of this sub-criterion** may be needed to develop a realistic solutions measure.
Even if a set of statistical indicators can be agreed on to operationalize the 10 sub-criteria, it is unlikely that data will be available for all selected indicators in all situations. The IRIS specifies that if data is missing an assessment of whether displacement has been overcome cannot be made. This results in a fourth methodological challenge where data points are missing by design (e.g. households without children cannot be assessed for whether children currently attend school). Further empirical work provides an opportunity to assess and further explore the feasibility of this approach and gain greater insight on the aggregate **effects of missing data points**.
A fifth methodological challenge is the unclear guidance on the **aggregation of individual-level indicators to the household-level**. Some of the sub-criteria promoted for the solutions measure are more naturally measured on the individual-level than the household-level. For example, employment is easiest to assess on the individual-level. There is hence a need to develop clear rules on how different indicators should be aggregated to the household-level at which the assessment and comparison with national/host community averages takes place.
The sixth not addressed methodological challenge arises if the **comparator value comes with a level of statistical uncertainty**. Assume the benchmark value for the national/host population was produced using a sample survey – it would thereby come with a confidence interval around the comparator point estimate. It is not clear yet if a displaced household would have to perform as good as or better than the benchmark's point estimate or whether displaced households should "just" perform as good as or better than the lower bound of the confidence interval in the benchmark population. Deciding on an approach towards statistical uncertainty is crucial as the IDP stock estimates in most countries are not derived from a full mapping of the complete IDP population but from sampled surveys of displaced and non-displaced households. How to deal with levels of statistical uncertainty is relevant for binary but also metric indicators.
A seventh area left open by IRIS is the **definition of the comparator population** itself – national or "host". If a host community is to be used as comparator, a clear definition of the term will need to be developed. While it may be preferable for IRIS to *not* provide a standardized recommendation on this and leave flexibility to data producers, further empirical work is recommended to at least assess how the choice of the comparator population affects the aggregate results across different context, and facilitate an evidence-informed decision.
As the eight challenge, the IRIS solution measure should address to what extent assistance received should be "factored out / imputed out" before an assessment is made of how many IDPs exit the stock. For example, if IDPs overcome key-displacement related vulnerabilities because their shelter and housing is provided through humanitarian assistance, they may exit the stock without actually having overcome their housing-related vulnerabilities. While the solutions measure only produces an aggregate number, the measure should ensure that the overall exit from the IDP stock is not based on the **overcoming of vulnerabilities through humanitarian assistance** but through sustainable solutions.
Related to the definition of a comparator population is the nineth challenge, on how to deal with **changing benchmark values over time**. If for example unemployment in the host community/ national population drops from one stock assessment to the next due to improving economic conditions in a country, this increases the benchmark value, and IDPs that have previously been taken out of the stock as having overcome their vulnerabilities may fall under the comparator benchmark again, re-entering and thereby enlarging the stock without the occurrence of any new displacement-causing events.
\begin{story}[ht]
\framedbox{
\textbf{Challenge 1:} Find a way to make the benchmarking against target population statistically applicable
\textbf{Challenge 2:} Specify one or several statistical indicators for each of the 10 sub-criteria
\textbf{Challenge 3:} Address sub-criterion on property restitution and compensation
\textbf{Challenge 4:} Specify how missing data points should be dealt with
\textbf{Challenge 5:} Clarify the aggregation of individual-level indicators to the household level
\textbf{Challenge 6:} Address how to deal with statistical uncertainty in the target/benchmark values
\textbf{Challenge 7:} Define the comparator population
\textbf{Challenge 8:} Specify how assistance is factored into the solutions measure
\textbf{Challenge 9:} Address changing benchmark values over time
}
\caption[]{Remaining methodological challenges for a workable solutions metrics}
\label{box:box1}
\end{story}
# Methodology
```{r load_data}
source("simulations.R")
data_nigeria <- targets::tar_read("data_nigeria")
data_hargeisa <- targets::tar_read("data_hargeisa")
data_sudan <- targets::tar_read("data_sudan")
data_colombia <- targets::tar_read("data_colombia")
indicators_nigeria <- extract_indicators(data_nigeria)
indicators_hargeisa <- extract_indicators(data_hargeisa)
indicators_sudan <- extract_indicators(data_sudan)
indicators_colombia <- extract_indicators(data_colombia)
sudan_helper <- sapply(indicators_sudan, length)
colombia_helper <- sapply(indicators_colombia, length)
DS <- data.frame(
"Subcriteria in IRIS" = c(IASC$Subcriteria,
"Number of possible combinations"),
"Durable Solutions \nLibrary" = c(7,1,1,8,4,3,8,10,6,4, " "),
"IDP Profiling \nin Hargeisa" =
c(sapply(indicators_hargeisa, length),nrow(expand.grid(indicators_hargeisa))),
"IDP Profiling \nin Nigeria" =
c(sapply(indicators_nigeria, length),nrow(expand.grid(indicators_nigeria))),
"IDP Profiling \nin El Fasher" =
c(sudan_helper[1],0,sudan_helper[2:9], nrow(expand.grid(indicators_sudan))),
"LSMS \nin Colombia" =
c(colombia_helper[1],0,colombia_helper[2:7],0, colombia_helper[8], nrow(expand.grid(indicators_colombia)))
)
```
```{r cell_options}
cells_hargeisa <- combn(names(select(data_hargeisa, starts_with("HH_"))),3) %>% t()
cells_nigeria <- combn(names(select(data_nigeria, starts_with("HH_"))),3) %>% t()
cells_colombia <- combn(names(select(data_colombia, starts_with("HH_"))),3) %>% t()
```
This paper provides actionable suggestions for overcoming the above challenges, and aims to substantiate all suggestions with empirical evidence. Given that - for now - the solutions measure is not yet statistically workable, the empirical evidence on effects of any proposal on the aggregate IDP stock cannot be based on a single "solutions measure estimate" because this does not yet exist. To overcome this, we took stock of all potential metric choices and all potential indicator choices, and ran iterative simulations of all combinations thereof (see Figure \ref{fig:sim_approach}). We selected four empirical contexts (Hargeisa, Nigeria, Colombia, Sudan). In each dataset, 1000 possible combinations of 10 indicators (one for each of the sub-criteria) were assessed using all five metric options outlined further below. The results - both in terms of the mean and distribution of exits from the stock across combinations - are highly indicative and insightful for understanding the effect of a proposed solution on the aggregate exits from the IDP stock, without having to decide for one or the other option. Annex [I](#annex-i-detailed-simulations-methodology) provides further details on the simulation methodology.
![Simulation approach \label{fig:sim_approach}](design/sim_approach.png){width=95%}
```{r flow_diagram, eval=F}
# DO NOT RUN! Produces html output
grViz("
digraph dot {
graph [layout = dot, rankdir = LR]
node [shape = Rounded,
style = filled,
color = grey,
fontname= Arial]
node [fillcolor = dodgerBlue4]
Dataset
node [fillcolor = dodgerBlue3]
b[label= 'Combination 1\nof 10 indicators']
node [fillcolor = gray55]
c[label= 'Combination 2 \nof 10 indicators']
d[label= 'Combination ... \nof 10 indicators']
node [fillcolor = dodgerBlue]
e[label = 'Pass/fail']
f[label = 'Metrics option 1']
g[label = 'Metrics option 2']
h[label = 'Metrics option 3']
i[label = 'Metrics option 4']
j[label = 'Metrics option 5']
node [fillcolor = gray80]
k[label = 'Iterate through the pass/fail \nand the 1-5 metrics options']
l[label = 'Iterate through the pass/fail \nand the 1-5 metrics options']
node [fillcolor = white, label = 'Report number of IDPs overcoming vulnerabilities']
m n o p q r
node [fillcolor = gray95]
s[label = 'Report number of IDPs overcoming vulnerabilities for each metric']
t[label = '...']
edge [color = grey]
Dataset -> {b c d}
b -> {e f g h i j}
c -> {k}
d -> {l}
e -> {m}
f -> {n}
g -> {o}
h -> {p}
i -> {q}
j -> {r}
k -> {s}
l -> {t}
}")
```
# Overcoming challenge 1: Addressing the selection of a statistical metric
A necessary next step towards an applicable metric is to address how the various sub-criteria are combined and then compared to the national population or the host community in a statistically applicable way ("challenge 1" in the above list). The overall aim is to be able to compare an IDP household to a benchmark on all 10 sub-criteria to decide whether a household performs the same or even better than the national average or the host community and should hence no longer be counted as IDP household in official statistics. As this comparability is currently not technically applicable in the IRIS solutions metric, different metric options - approaches to combine indicators and compare them with a benchmark population - are available to develop the IRIS solution measure.
## Metric options
### Pass/fail measure: Implementing the IRIS without a comparator population ^[For completeness and to allow comparisons with other metric options, this note also demonstrates simulations based on this approach but will not discuss the suitability of a pass/fail measure as option going forward.]
The current measure described in IRIS does not allow for a meaningful comparison between IDPs and host communities/national averages. One could make the decision to fully focus on a pass/fail decision on the sub-criterion level (as demonstrated in Table \ref{tab:example} above) by only assessing whether a household achieves the sub-criterion. This is not a desirable option because no comparison takes place, and it thereby fails to comply with the fundamental logic set out in IRIS for a solutions measure, which is that it should be assessed relative to a host/national benchmark. A consequence of a context-blind pass/fail approach is that this option is likely to produce very low numbers of IDPs overcoming their vulnerabilities because achieving a ‘one’ for all 10 indicators is a hard threshold in many displacement contexts.
### Option 1: An actual composite metric across all criteria
The approach currently outlined in IRIS is referred to as "composite" measure but an actual composite measure sums up indicators to one score. Because IRIS prescribes that a pass needs to be achieved in each sub-criterion/indicator and a single indicator per sub-criterion is implicitly assumed, the current IRIS solutions measure is strictly speaking not a composite measure.
Hence, one option to explore that aligns with the contextualized approach would be to redefine the overall framework as an actual composite index. This would mean adding up all indicators across all criteria to one score (see an illustration in Table \ref{tab:option1}). This household level index could then be more readily compared to the average value of the same composite index in the comparator population. The shortcoming of this approach is that a household may be taken out of the IDP stock despite underperformance on a specific key criterion or sub-criterion (which is a deviation from the IRIS requirement that a pass needs to be achieved at the sub-criterion level).\footnote{One way to address this shortcoming could be to create a "hybrid" composite measure. For example, one could imagine a full composite index for the criteria 2, 3 and 5 while the more rights-based criteria 1 (freedom to move) and 4 (property restitution) are scored as a pass or no pass.}
\begin{table}[!htp]
\caption{\label{tab:option1}Option 1: A full composite metric for all sub-criteria}
\centering
\footnotesize
\begin{tabular}[t]{llll}
\toprule
& Sub-criteria & Indicator & \\
\midrule
Household A & 1.1 Victims of violence & Indicator 1.1.a & 1\\
& 1.2 Freedom of movement & Indicator 1.2.a & 1\\
& 2.1 Food security & Indicator 2.1.a & 1\\
& 2.2 Shelter and housing & Indicator 2.2.a & 1\\
& 2.3 Medical services & Indicator 2.3.a & 1\\
& 2.4 Education & Indicator 2.4.a & 0\\
& 3.1 Employment and livelihoods & Indicator 3.1.a & 0\\
& 3.2 Economic security & Indicator 3.2.a & 0\\
& 4.1 Property restitution and compensation & Indicator 4.1.a & 1\\
& 5.1 Documentation & Indicator 5.1.a & 1\\
\midrule
& Total composite index for the household & & 7/10\\
& Benchmark average & & 8.6/10\\
\midrule
\multicolumn{4}{l}{\rule{0pt}{1em}\textbf{Decision:} The household has not overcome displacement as the composite index is below the benchmark.}\\
\bottomrule
\end{tabular}
\end{table}
\newpage
### Option 2: A set of composite indices at the criterion level
Rather than construct a single index across all criteria, one could define composite indices for each criterion (see Table \ref{tab:option2}). For example, the composite index for the criterion on *2. Adequate standard of living* could consist of four or more equally weighted indicators corresponding to the four related sub-criteria (*2.1. Food security, 2.2 Shelter and housing, 2.3 Medical services, 2. 4 Education*). A household could then score values of 0-4 on this sub-criterion index (or 0%, 25%, 50%, 75%, 100%). This score can be compared with a distribution average in the comparator population for the same composite index. As with option 1 above, this would constitute a deviation from IRIS, which explicitly states that a pass needs to be achieved at the sub-criterion level; under this option a household may be taken out of the IDP stock despite underperformance on a specific sub-criterion.
In order to construct an index in each criterion, at least 2 binary indicators per criterion would be required, but more would be recommended. A small number of indicators per criterion would result in a cruder measure, thereby barely overcoming challenge 1 (as we will still be comparing discrete values – e.g. 0;1;2 in the case of 2 indicators for a given criterion - with a continuous distribution average; so for a given household to perform on par or better than the benchmark, it would be irrelevant whether the benchmark valuer was, say, 0.1 or 0.9 – in either case the household in question would need to score a 1 to achieve a pass on this criterion). Assuming at least 3 indicators per each of the 5 criteria (and 4 indicators in the case of criterion 2, in alignment with its 4 sub-criteria), this would result in an overall computation based on at least 16 indicators.
\begin{table}[!htpb]
\caption{\label{tab:option2}Option 2: A composite metric at the criterion level}
\centering
\footnotesize
\scalebox{0.9}{
\begin{tabularx}{\textwidth}{lXXllX}
\toprule
& Criterion & Sub-criteria & Indicator & Composite & Comparison to benchmark (population average) \\
\midrule
Household A & \multirow{3}{\hsize}{1. Safety and security} & \multirow{2}{\hsize}{1.1 Victims of violence} & Indicator 1.1.a & 2/3 & 1.9 (Pass because \\
& & & Indicator 1.1.b & & 2 > 1.9)\\
& & 1.2 Freedom of movement & Indicator 1.2.a & & \\
\midrule
& \multirow{4}{\hsize}{2. Adequate standard of living} & 2.1 Food security & Indicator 2.1.a & 3/4 & 3.5 (No pass because \\
& & 2.2 Shelter and housing & Indicator 2.2.a & & 3 < 3.5) \\
& & 2.3 Medical services & Indicator 2.3.a & & \\
& & 2.4 Education & Indicator 2.4.a & & \\
\midrule
& \multirow{3}{\hsize}{3. Access to livelihoods} & \multirow{2}{\hsize}{3.1 Employment and livelihoods} & Indicator 3.1.a & 2/3 & 1.9 (Pass because \\
& & & Indicator 3.1.b & & 2 > 1.9) \\
& & 3.2 Economic security & Indicator 3.2.a & & \\
\midrule
& \multirow{3}{\hsize}{4. Restoration of housing, land and property} & \multirow{3}{\hsize}{4.1 Property restitution and compensation} & Indicator 4.1.a & 1/3 & 0.9 (Pass because \\
& & & Indicator 4.1.b & & 1 > 0.9) \\
& & & Indicator 4.1.c & & \\
& \multirow{3}{\hsize}{5. Access to documentation} & \multirow{3}{\hsize}{5.1 Documentation} & Indicator 5.1.a & 0/3 & 0.9 (No pass because \\
& & & Indicator 5.1.b & & 0 < 0.9) \\
& & & Indicator 5.1.c & & \\
\midrule
& \multicolumn{5}{>{\hsize=\dimexpr4\hsize+6\tabcolsep+3\arrayrulewidth\relax}X}{\textbf{Decision:} The household has not overcome displacement-related vulnerabilities as the composite index for some criteria is lower than the benchmark.}\\
\bottomrule
\end{tabularx}
}
\end{table}
\newpage
### Option 3: A set of composite sub-indices at the sub-criterion level
Very similar to the second approach, one could define composite indices within each sub-criterion (see Table \ref{tab:option3}). For example, the composite index for sub-criterion *2.1 Food security* could be associated with 3 equally weighted binary indicators. A household could then score values of 0-3 on this sub-criterion index (or 0%, 33%, 66%, 100%). This score can be compared with a distribution average in the comparator population for the same composite index. The major difference between this option and options 1&2 above is that this option would not constitute a methodological deviation from IRIS, in that it would ensure that a pass is achieved at the sub-criterion level for a household to be taken out of the IDP stock.
However, there is a serious feasibility issue with this option. As with the preceding option, at least 2 binary indicators are required per sub-criterion to construct an index, but more would be recommended. A small number of indicators per sub-criterion would result in a cruder measure, thereby barely overcoming challenge 1 (as we will still be comparing discrete values – e.g. 0;1;2 in the case of 2 indicators for a given sub-criterion - with a continuous distribution average; so for a given household to perform on par or better than the benchmark, it would be irrelevant whether the benchmark valuer was, say, 0.1 or 0.9 – in either case the household in question would need to score a 1 to achieve a pass on this criterion). Assuming at least 3 indicators per each of the 10 sub-criteria, this would result in an overall computation based on at least 30 indicators – which appears overly burdensome for widespread application.
\begin{table}[!htbp]
\caption{\label{tab:option3}Option 3: A composite metric at the sub-criterion level}
\centering
\scalebox{0.55}{
\begin{tabularx}{1.7\textwidth}{lXXllX}
\toprule
& Sub-criteria & Indicators & & Composite & Comparison to benchmark \\
\midrule
Household A & \multirow{3}{\hsize}{1.1 Victims of violence} & Indicator 1.1.a & 1 & 3 & 2.8 (Pass)\\
& & Indicator 1.1.b & 1 & & \\
& & Indicator 1.1.c & 1 & & \\
\midrule
& \multirow{3}{\hsize}{1.2 Freedom of movement} & Indicator 1.2.a & 1 & 3 & 2.5 (Pass) \\
& & Indicator 1.2.b & 1 & & \\
& & Indicator 1.2.c & 1 & & \\
\midrule
& \multirow{3}{\hsize}{2.1 Food security} & Indicator 2.1.a & 1 & 2 & 1.1 (Pass)\\
& & Indicator 2.1.b & 0 & & \\
& & Indicator 2.1.c & 1 & & \\
\midrule
& \multirow{3}{\hsize}{2.2 Shelter and housing} & Indicator 2.2.a & 1 & 2 & 1.9 (Pass) \\
& & Indicator 2.2.b & 0 & & \\
& & Indicator 2.2.c & 1 & & \\
\midrule
&\multirow{3}{\hsize}{ 2.3 Medical services} & Indicator 2.3.a & 1 & 3 & 3 (Pass) \\
& & Indicator 2.3.b & 1 & & \\
& & Indicator 2.3.c & 1 & & \\
\midrule
&\multirow{3}{\hsize}{2.4 Education} & Indicator 2.4.a & 1 & 1 & 2.5 (No pass) \\
& & Indicator 2.4.b & 0 & & \\
& & Indicator 2.4.c & 0 & & \\
\midrule
& \multirow{3}{\hsize}{3.1 Employment and livelihoods} & Indicator 3.1.a & 0 & 0 & 1.5 (No pass) \\
& & Indicator 3.1.b & 0 & & \\
& & Indicator 3.1.c & 0 & & \\
\midrule
& \multirow{3}{\hsize}{3.2 Economic security} & Indicator 3.2.a & 0 & 1 & 2.9 (No pass) \\
& & Indicator 3.2.b & 0 & & \\
& & Indicator 3.2.c & 1 & & \\
\midrule
& \multirow{3}{\hsize}{4.1 Property restitution and compensation} & Indicator 4.1.a & 1 & 3 & 2.5 (Pass)\\
& & Indicator 4.1.b & 1 & & \\
& & Indicator 4.1.c & 1 & & \\
\midrule
& \multirow{3}{\hsize}{5.1 Documentation} & Indicator 5.1.a & 0 & 2 & 1.8 (Pass)\\
& & Indicator 5.1.b & 1 & & \\
& & Indicator 5.1.c & 1 & & \\
\midrule
& \multicolumn{5}{>{\hsize=\dimexpr4\hsize+6\tabcolsep+3\arrayrulewidth\relax}X}{\textbf{Decision:} The household has not overcome displacement as some composite indices on the sub-criterion level are lower than the benchmark.}\\
\bottomrule
\end{tabularx}
}
\end{table}
\newpage
### Option 4: Comparison of homogeneous cells:
Rather than trying to create a continuous indicator value at the household level through composite indices as in the first three options, an alternative approach could be to divide the IDP population into small homogeneous "cells" – for example by location of displacement, area of habitual residence and year of arrival (see Table \ref{tab:option4}). Even if selecting just one indicator per sub-criterion (i.e. 10 indicators for the measure overall), for each cell an average proportion achieving a pass could be calculated which subsequently can be compared against the distribution average in the comparator benchmark. The decision whether displacement has ended is no longer made on the individual household level but on the cell level. If an IDP cell has achieved a durable solution in comparison to the benchmark, all IDPs in this group are taken out of the stock of IDPs. If the cell has not achieved a durable solution, all IDPs remain as IDPs in the stock.
The advantage of this option is that it is the only one to truly overcome challenge 1 and fully deliver to the contextualized approach proposed by IRIS, in that it would be comparing continuous cell values with benchmark continuous values. The shortcoming of this option is that the aggregate results may depend heavily on the criteria chosen for dividing IDP populations into cells. A strong assumption of homogeneity in the cells is necessary.
\begin{table}[!htp]
\caption{\label{tab:option4}Option 4: Comparison of homogeneous cells}
\centering
\footnotesize
\scalebox{0.9}{
\begin{tabularx}{\textwidth}{XXlXX}
\toprule
& Sub-criteria & Indicator & Percentages in cell Z & Benchmark comparison \\
\midrule\multirow{10}{\hsize}{Cell Zcomposed of different IDP households (e.g. based on current location, area of origin and year of arrival } & 1.1 Victims of violence & Indicator 1.1.a & 29\% pass indicator in cell Z & 28\% pass indicator in population (Pass) \\
& 1.2 Freedom of movement & Indicator 1.2.a & 26\% & 25\% (Pass) \\
& 2.1 Food security & Indicator 2.1.a & 2.2\% & 2\% (Pass) \\
& 2.2 Shelter and housing & Indicator 2.2.a & 19\% & 19\% (Pass) \\
& 2.3 Medical services & Indicator 2.3.a & 3.5\% & 3\% (Pass)\\
& 2.4 Education & Indicator 2.4.a & 1\% & 3.5\% (No pass)\\
& 3.1 Employment and livelihoods & Indicator 3.1.a & 12\% & 15\% (No pass)\\
& 3.2 Economic security & Indicator 3.2.a & 25\% & 29\% (No pass)\\
& 4.1 Property restitution and compensation & Indicator 4.1.a & 31\% & 25\% (Pass)\\
& 5.1 Documentation & Indicator 5.1.a & 35\% & 18\% (Pass)\\
\midrule
& \multicolumn{4}{>{\hsize=\dimexpr4\hsize+6\tabcolsep+3\arrayrulewidth\relax}X}{\textbf{Decision:} All households in cell Z (that includes household A) have not overcome displacement as their average distribution in some sub-criteria is lower than the average in the benchmark.}\\
\bottomrule
\end{tabularx}
}
\end{table}
\begin{story}[htpb!]
\framedbox{
\textbf{Using key-informant interviews and qualitative data collections for the solutions measure}
\bigskip
Many data collections in contexts of forced displacement use key informant interviews, focus groups and other qualitative strategies. These data collections, such as the Displacement Tracking Matrix by the International Organization for Migration, are crucial and reliable data sources for humanitarians and rapid response teams. However, for the production of official statistics on IDP exits from the stock a representative sample of IDP households is required that can be compared with national or host community averages. Nevertheless, data on the community-level, as often generated through key informants, can be a valuable step in the transition to a fully comparable micro-level solutions measure. In the homogeneous cell approach (Option 4), interviews with community leaders and other informants could be used to identify if particular IDP subgroups and communities perform well on the 10 sub-criteria if no household-level or individual-level is available or data collections are infeasible. For example, information on the freedom to move may be informative on the community-level until household- and individual-level data is produced.
}
\caption[]{The role of qualitative data collections in metric option 4}
\label{box:IOM}
\end{story}
\newpage
### Option 5: Classifier/regression-based approach
Another option for the solutions measure could be to take a regression-based approach in which the 10 sub-criteria are used as covariates to predict whether a household should still be classified as an IDP household or not (see Table \ref{tab:option5}). A probabilistic classifier, such as a logistic regression, would estimate whether an IDP household is distinct from the host community (i.e. high probability to be an IDP) or whether an IDP household is very similar to households in the host community (i.e. low probability to be an IDP).
Important decisions to make before implementing this option are which classifier to select, how to select probability cut-off points to determine whether a specific household is similar to IDPs or to hosts, and how to deal with sample imbalance in the data. While this approach would overcome the challenge of comparing IDP households with host community households, this approach needs further clarifications and is not sensible to underperformance on specific indicators.
\begin{table}[!htp]
\caption{\label{tab:option5}Option 5: A classifier/regression-based approach}
\centering
\footnotesize
\scalebox{0.9}{
\begin{tabularx}{\textwidth}{XlXXX}
\toprule
& Sub-criteria & Indicator & & Regression weights \\
\midrule
Household A & 1.1 Victims of violence & Indicator 1.1.a & 1 Pass & -1.58\\
& 1.2 Freedom of movement & Indicator 1.2.a & 1 Pass & -0.04\\
& 2.1 Food security & Indicator 2.1.a & 1 Pass & 1.59\\
& 2.2 Shelter and housing & Indicator 2.2.a & 1 Pass & -2.24\\
& 2.3 Medical services & Indicator 2.3.a & 1 Pass & 0.48 \\
& 2.4 Education & Indicator 2.4.a & 0 No Pass & 4.48\\
& 3.1 Employment and livelihoods & Indicator 3.1.a & 0 No Pass & 6.72\\
& 3.2 Economic security & Indicator 3.2.a & 0 No Pass & 2.13\\
& 4.1 Property restitution and compensation & Indicator 4.1.a & 1 Pass & 4.32\\
& 5.1 Documentation & Indicator 5.1.a & 1 Pass & -0.22\\
\midrule
& \multicolumn{2}{l}{\rule{0pt}{1em}Predicted probability for household (e.g. logit transformed)} & & 0.439 \\
& \multicolumn{2}{l}{\rule{0pt}{1em}Cut-off point} & & 0.5 \\
\midrule
& \multicolumn{4}{>{\hsize=\dimexpr3\hsize+30\tabcolsep+10\arrayrulewidth\relax}X}{\textbf{Decision:} The household has not overcome displacement as the predicted probability of being similar to the host community is below the cut-off point.}\\
\bottomrule
\end{tabularx}
}
\end{table}
### Option 6: Empirical cumulative distribution approach
Another approach is to focus on the multidimensional empirical cumulative distribution (eCDF). In line with IRIS, the general idea remains that an IDP household A is less vulnerable than another household if household A has overcome all vulnerabilities that the comparison household has overcome. The empirical cumulative distribution then describes the share of households that are more vulnerable than a given household. For example (also conceptually displayed in Figure \ref{fig:ecdf}), imagine that a household A has overcome 7 of the 10 displacement-related vulnerabilities. Given the distribution of vulnerabilities in the comparator population, we can say that around 82.5% of the comparator households have not yet overcome all of these vulnerabilities. The 82.5% are hence the share of host community households that are more vulnerable than the example IDP household A. This share also describes the probability that the IDP household A is less vulnerable than a "randomly selected" or average household. In the example, the probability that household A is less vulnerable that the average comparator household is high, with over 80%.
While previous metric options provide a clear-cut decision whether a specific household exits the stock or not, this metric generates a probability that a given IDP household is less vulnerable than the average comparator household. To then calculate exits from the stock for aggregated statistics, one can calculate the average (weighted) probability of overcoming displacement-related vulnerabilities across all IDP households in comparison to the host community. Although conceptually more complex, the metric option incorporates the logic of IRIS by comparing IDPs to the comparator population while the requirement of achieving a pass on all indicators is implicitly incorporated into the definition of the multivariate cumulative distribution function.
\bigskip
```{r ecdf, fig.cap="Empirical cumulative distribution", fig.width=7, fig.height=4, out.width="60%"}
set.seed(1234)
minVal <- 0
maxVal <- 10
mn <- (maxVal - minVal)/2
# Generate numbers (mostly) from min to max
x <- rnorm(200, mean = mn, sd = mn/3)
# Do something about the out-of-bounds generated values
x <- pmax(minVal, x)
x <- pmin(maxVal, x)
x <- data.frame(x)
x$x <-round(x$x)
library(ggplot2)
ggplot(x, aes(x)) + stat_ecdf(geom = "step")+
xlab("Overcome vulnerabilities")+
ylab("Share of comparator households \n Cumulative probability")+
geom_vline(aes(xintercept=6.99), color="dodgerblue",size=1)+
geom_text(aes(x=8,y=0.5, label="Household A \nhas overcome \n7 vulnerabilities"),color="dodgerblue")+
geom_hline(aes(yintercept=0.825), color="dodgerblue",linetype=2, size = 1)+
geom_text(aes(x=2.1,y=0.73,label="82.5% of comparator \nhouseholds have not \n overcome vulnerabilities"),color="dodgerblue")+
theme_bw()
```
\noindent Box \ref{box:box2} summarizes the main advantages and disadvantages of the different metrics options. The paper going forward aims to provide empirical information how these metrics perform (that is how many IDPs they count as having overcome their displacement-related vulnerabilities).
\begin{story}[htpb!]
\framedbox{
\begin{minipage}{0.2\textwidth}
\footnotesize
1. Composite metrics \\
across all criteria \\
2. Composite metrics at \\
the criterion level \\
3. Composite metrics at \\
the sub-criterion level \\~\\
4. Homogeneous cells \\~\\
5. Classifier/ \\
regression-based \\~\\
6. Empirical cumulative \\
distribution \\
\end{minipage}%
\hfill
\begin{minipage}{0.72\textwidth}
\footnotesize
\begin{tabular}{|p{\textwidth}}
\textcolor{red}{X} Ignores principle that a pass is require in each sub-criterion\\
\checkmark Low amount of indicators required\\~\\
\textcolor{orange}{O} Relaxes principle that a pass is required to the criterion level\\
\textcolor{orange}{O} Medium amount of indicators required \\~\\
\checkmark Complies with principle that a pass is required in each sub-criterion \\
\textcolor{red}{X} Very high amount of indicators required\\~\\
\checkmark Complies with principle that a pass is required in each sub-criterion \\
\checkmark Low amount of indicators required\\
\textcolor{red}{X} Adds methodological complexity to define homogeneous cells\\~\\
\textcolor{red}{X} Ignores principle that a pass is required in each sub-criterion \\
\checkmark Low amount of indicators required\\
\textcolor{red}{X} Analysis requires familiarity with regression-based analysis\\~\\
\checkmark Complies with principle that a pass is required in each sub-criterion \\
\textcolor{orange}{O} Implements probability rather than strict decision\\
\textcolor{red}{X} Complexity, requires familiarity with eCDF \\
\end{tabular}
\end{minipage}
}
\caption[]{Main advantage and disadvantage of different metrics}
\label{box:box2}
\end{story}
\newpage
## Simulation results for Hargeisa (UNHCR 2015)
```{r simulation_results_hargeisa}
DS_mean_hargeisa <- mean(c(DS_Option1_hargeisa$DS,
DS_Option2_hargeisa$DS,
DS_Option3_hargeisa$DS,
DS_Option4_hargeisa$DS,
DS_Option5_hargeisa$DS,
DS_Option6_hargeisa$DS))
DS_perc_mean_hargeisa <- mean(c(DS_Option1_hargeisa$DS_perc,
DS_Option2_hargeisa$DS_perc,
DS_Option3_hargeisa$DS_perc,
DS_Option4_hargeisa$DS_perc,
DS_Option5_hargeisa$DS_perc,
DS_Option6_hargeisa$DS_perc))
combinations_hargeisa <- data_hargeisa %>% extract_indicators() %>% generate_combinations()
hargeisa_idps <- data_hargeisa %>% filter(ID ==1)
missing_har <- 1:nrow(combinations_hargeisa) %>%
map_dbl(~hargeisa_idps%>% select(as.character(combinations_hargeisa[.x,])) %>% negate(complete.cases)() %>% mean()) %>%
enframe(name = NULL, value = "missing")
run_OLS <- function(data){
lm(DS_perc*100 ~ .,
data %>%
select(., starts_with("I"), -any_of("iteration"),
-where(~all(.==first(.))),DS_perc)) %>%
broom::tidy(.,conf.int = TRUE)
}
```
The IDP profiling in Hargeisa covered a total of `r sum(data_hargeisa$ID)` IDP households that could be compared directly to their hosts. In the following sections, we simulate how many IDP households would exit the stock if we apply the different metric options 1-6 on different indicators of vulnerability. As a reference, a pass/fail measure that does not compare IDPs with comparator households exits on average `r round(mean(DS_Original_hargeisa$DS),2)` IDP households from the stock in Hargeisa. On average across all simulated indicators and 6 metric options, `r round(DS_mean_hargeisa,2)` IDP households overcome their vulnerabilities and exit the IDP stock. This corresponds to an average of `r round(DS_perc_mean_hargeisa*100,2)`% of the sampled IDP population in Hargeisa. The low exit numbers are largely a result of the fact that IDP households could not be assessed as the IRIS recommends in paragraph 168 that no assessment should take place if data is missing on at least one of the 10 sub-criteria (on average this applies to `r round(mean(missing_har$missing)*100,2)`% of households across the indicator combinations). This note will discuss the challenge of missing data in subsequent sections.
Table \ref{tab:HAR_metric} summarizes the key findings related to the range of IDP households that exit the stock across all metrics (how much variation there is in results), and how important the choice of individual indicators is in each approach. The table also provides the mean number of IDP households exiting the stock. Overall, very few exits from the stock are possible in the Hargeisa context. As elaborated later, this is to some extent an effect of data missingness and the available indicators for *4.1 Property restitution and compensation*, which are hard to pass for IDP households. It should be noted that a composite measure at the sub-criterion level yields no exits from the stock, independent of the chosen indicators. A full composite index across all 10 sub-criteria and a regression-based approach generate the highest possible exits from the IDP stock, dependent on the indicator set chosen.
```{r Har_metric}
options <- c("Pass/fail measure (no comparison!)","1: Full composite",
"2: Composite at criterion level", "3: Composite at sub-criterion level",
"4: Comparison of homogeneous cells","5: Classifier/ regression-based",
"6: Empirical cumulative distribution")
data_str <- c("DS_Original_hargeisa",paste0("DS_Option",1:6, "_hargeisa"))
columns <- c(" ","Mean of IDPs exiting the stock",
"Range of IDPs exiting the stock",
"Variation in how many IDPs exit the stock")
HAR_metric <- data.frame(
Var1 = options,
mean = sapply(data_str, function(x){round(mean(get(x)$DS),2)}),
mean_p = sapply(data_str, function(x){round(mean(get(x)$DS_perc*100),2)}),
range = sapply(data_str, function(x){paste(round(range(get(x)$DS)),collapse = " to ")}),
range_p = sapply(data_str, function(x){paste(round(range(get(x)$DS_perc*100),2),collapse = " to ")}),
var = sapply(data_str, function(x){round(sd(get(x)$DS),2)})
) %>%
mutate(range = paste0(range, " IDPs (" , range_p, "% of stock)"),
mean = paste0(mean, " IDPs (",mean_p,"% of stock)"),
var = paste0(c("Very low (SD: ", "Low (SD ","Very low (SD: ",
"No variation (SD: ","Very low (SD: ", "Low (SD: ", "Low (SD: "),
var,")")) %>%
select(-range_p, -mean_p) #%>% t()
# get row and colnames in order
rownames(HAR_metric) <- NULL
colnames(HAR_metric) <- columns
knitr::kable(HAR_metric, "latex",
booktabs = TRUE, linesep = "",
caption= "\\label{tab:HAR_metric}Simulation results for Hargeisa across all metrics (Total sample of IDPs: 939 households)") %>%
kable_styling(latex_options = "hold_position", font_size=10)%>%
column_spec(1, width = "11em")%>%
column_spec(2:4, width = "9.5em")
```
Figure \ref{fig:density_hargeisa} depicts the distribution of the simulation results. The figure displays the density of the conducted simulations for the 6 identified metric options. To allow comparisons, a simple pass/fail measure on the sub-criterion level has been added but readers should note that this does not enable any comparisons with hosts. The graph displays on the x-axis what percentage of the IDP population in the dataset has overcome vulnerabilities and exits the stock. The y-axis displays the density of simulations or how many of the simulations shared the same outcome (number of IDPs exiting the stock). Note that the y-axis and the x-axis in the different graphs have their own scaling to make visualization easier.
```{r density_plots_hargeisa, fig.cap="\\label{fig:density_hargeisa}Density of simulations for all possible metrics (Hargeisa)", fig.width=7,fig.height=5.5, out.width="80%"}
plot_density <- function(data,title){
ggplot(data)+
geom_density(aes(x = DS_perc), fill="dodgerblue",alpha = 0.5)+
geom_vline(aes(xintercept = mean(DS_perc)),
linetype = "dashed")+
scale_x_continuous(labels = scales::label_percent(), limits = c(-0.001,0.8)) +
labs(x = "Simulated proportion overcoming vulnerabilities",
y = "Simulation density",
title = title)+
theme_bw()+
theme(title = element_text(size=8), text = element_text(size= 8),
axis.title = element_text(size=8))
}
plot_density_no_variation <- function(data,title){
ggplot(data)+
geom_density(aes(x = DS_perc), fill="dodgerblue",alpha = 0.5)+
geom_vline(aes(xintercept = mean(DS_perc)),
linetype = "dashed")+
scale_x_continuous(labels = scales::label_percent()) +
labs(x = "Simulated proportion overcoming vulnerabilities",
y = "Simulation density",
title = title)+
theme_bw()+
theme(title = element_text(size=8), text = element_text(size= 8),
axis.title = element_text(size=8))
}
(plot_density(DS_Original_hargeisa, "Pass/fail measure") +
plot_density(DS_Option1_hargeisa,"1: Full composite"))/
(plot_density(DS_Option2_hargeisa,"2: Composite at criterion level") +
plot_density_no_variation(DS_Option3_hargeisa, "3: Composite at sub-criterion level"))/
(plot_density(DS_Option4_hargeisa, "4: Comparison of homogeneous cells") +
plot_density(DS_Option5_hargeisa, "5: Classifier/regression-based"))/
(plot_density(DS_Option6_hargeisa, "6: Empirical cumulative distribution")+plot_spacer())
```
These density plots corroborate what was discussed above. Most simulations - independent of the metric or indicators chosen - result in 0 IDP households overcoming their vulnerabilities and exiting the stock. Because of their high threshold to exit IDPs from the stock, they effectively behave like the pass/fail measure that does not compare IDPs with a comparator population. Making a regression-based assessment (option 5) whether a specific household is predicted to be an IDP household or a host community household is an option that has the highest variability, which means that depending on the indicator combination chosen, the resulting number of IDPs exiting the stock can be higher or lower but the difference is marginal in the case of the Hargeisa data.
The finding that very few IDP households can exit the stock can be traced back to three challenges that will be discussed in later sections: *Data missingness* for individuals and households on relevant indicators can lead to an inability to assess large proportions of the IDP population because IRIS recommends that no assessment should take place and they should remain in the stock. Second, the *aggregation of individual-level data* to the household level (e.g., employment) can exacerbate the problem of missing data and creates ambiguity as to how to do the aggregation in a reasonable way. Third, the assessment of all 10 sub-criteria and the comparison to hosts is a *very high benchmark* to pass for IDPs in Hargeisa, in particular for the sub-criterion *4.1 Property restitution and compensation*.
## Simulation results for Nigeria (World Bank 2018)
```{r simulation_results_nigeria}
DS_mean_nigeria <- mean(c(DS_Option1_nigeria$DS,
DS_Option2_nigeria$DS,
DS_Option3_nigeria$DS,
DS_Option4_nigeria$DS,
DS_Option5_nigeria$DS,
DS_Option6_nigeria$DS))
DS_perc_mean_nigeria <- mean(c(DS_Option1_nigeria$DS_perc,
DS_Option2_nigeria$DS_perc,
DS_Option3_nigeria$DS_perc,
DS_Option4_nigeria$DS_perc,
DS_Option5_nigeria$DS_perc,
DS_Option6_nigeria$DS_perc))
combinations_nigeria <- data_nigeria %>% extract_indicators() %>% generate_combinations()
nigeria_idps <- data_nigeria %>% filter(ID ==1)
missing_nig <- 1:nrow(combinations_nigeria) %>%
map_dbl(~nigeria_idps%>% select(as.character(combinations_nigeria[.x,])) %>% negate(complete.cases)() %>% mean()) %>%
enframe(name = NULL, value = "missing")
```
The IDP profiling in Nigeria covered a total of `r sum(data_nigeria$ID)` IDP households. Because the dataset includes survey weights, these IDPs represent an overall amount of over `r round(sum(nigeria_idps$WT)/1000,2)` thousand IDP households. On average across all simulated indicators and metrics, `r round(DS_mean_nigeria/1000,2)` thousand IDP households exit the IDP stock, which corresponds to an average of `r round(DS_perc_mean_nigeria*100,2)`% of the IDPs represented by the profiling. For an average of `r round(mean(missing_nig$missing)*100,2)`% of the IDP households represented by the profiling in Nigeria, IDP households could not be assessed due to missing data on at least one dimension.
In Nigeria, the different metric options produce varying estimates for the exit from the IDP stock. Using a regression-based approach or one composite metric across all criteria produce the highest number of exits from the IDP stock and are the most sensitive to the indicators used while other approaches show little variation. Assessing exits from the IDP stock through the empirical cumulative distribution also generates a higher exit from the IDP stock. Table \ref{tab:NIG_metric} summarizes the key findings related to variation across indicators and metrics. Most importantly, we find very little variation in the number of IDPs exiting the stock for the composite measure at the sub-criterion level and for homogeneous cells.
```{r Nig_metric}
data_str_n <- c("DS_Original_nigeria",paste0("DS_Option",1:6, "_nigeria"))
NIG_metric <- data.frame(
Var1 = options,
mean = sapply(data_str_n, function(x){round(mean(get(x)$DS))}),
mean_p = sapply(data_str_n, function(x){round(mean(get(x)$DS_perc*100),2)}),
range = sapply(data_str_n, function(x){paste(round(range(get(x)$DS)),collapse = " to ")}),
range_p = sapply(data_str_n, function(x){paste(round(range(get(x)$DS_perc*100),2),collapse = " to ")}),
var = sapply(data_str_n, function(x){round(sd(get(x)$DS),2)})
) %>%
mutate(range = paste0(range, " IDPs (" , range_p, "% of stock)"),
mean = paste0(mean, " IDPs (",mean_p,"% of stock)"),
var = paste0(c("Low (SD: ", "High (SD: ","Low (SD: ",
"Very low (SD: ","Very low (SD: ", "Very high (SD: ", "High (SD: "),
var,")")) %>%
select(-range_p, -mean_p)
# get row and colnames in order
rownames(NIG_metric) <- NULL
colnames(NIG_metric) <- columns
knitr::kable(NIG_metric, "latex",
booktabs = TRUE, linesep = "",
caption= "\\label{tab:NIG_metric}Simulation results for Nigeria across all metrics (Weighted IDP households: 129.41 thousand)") %>%
kable_styling(latex_options = "hold_position", font_size=10)%>%
column_spec(1, width = "11em")%>%
column_spec(2:4, width = "9.5em")
```
\noindent Regarding metric option 4, different groupings into cells in Nigeria were tried based on date of arrival, date of displacement, and the origin and displacement location of IDPs. The simulations for both Hargeisa and Nigeria show that the variable used to group the IDP population into homogeneous cells to then make a group-level assessment against the comparator yields little volatility regarding the variables used for defining the cells. Details on the different groupings of IDPs into cells can be found in Box \ref{box:box_cells}.
Figure \ref{fig:density_nigeria} corroborates the above findings and displays the density of simulation outcomes for the 6 different metric options that could be used to implement the IRIS solutions measure in practice. The options are compared to a simple pass/fail measure which does not implement a comparison to host communities. The composite measure at the criterion level, the composite measure at the sub-criterion level and a comparison of homogeneous cells yield an average number of IDPs exiting the stock close to 0. These three metric options essentially generate similar results to a simple pass/fail measure without comparisons to hosts. In the case of the full composite measure (option 1), the density plots show higher variability and up to `r round(mean(DS_Option1_nigeria$DS_perc)*100,2)` percent of the IDP population assessed in Nigeria may have overcome their displacement-related vulnerabilities according to this measure. For a regression-based approach or the cumulative distribution, the assessment yielded a bimodal distribution.
```{r density_plots_nigeria, fig.cap="\\label{fig:density_nigeria}Density of simulations for all possible metrics (Nigeria)", fig.width=7,fig.height=5.5, out.width="80%"}
(plot_density(DS_Original_nigeria, "Pass/fail measure") +
plot_density(DS_Option1_nigeria,"1: Full composite"))/
(plot_density(DS_Option2_nigeria,"2: Composite at criterion level") +
plot_density(DS_Option3_nigeria, "3: Composite at sub-criterion level"))/
(plot_density(DS_Option4_nigeria, "4: Comparison of homogeneous cells") +
plot_density(DS_Option5_nigeria, "5: Classifier/regression-based"))/
(plot_density(DS_Option6_nigeria, "6: Empirical cumulative distribution")+plot_spacer())
```
\begin{story}[htbp!]
\framedbox{
\footnotesize
\textbf{How should the IDP population be split into homogeneous cells?}
\bigskip
To fully assess the suitability of the homogeneous cell approach, we have to assess different ways in which the IDP population can be grouped in cells as the size of cells, the homogeneity within IDP cells and the heterogeneity across IDP cells could affect how many IDP households exit the stock.
\bigskip
\textbf{Approach I: Grouping variables in the data to split the IDP population:} First, we selected three grouping variables from each of the datasets based on household characteristics - for example the gender of the household head, the location of origin, or the time of displacement - and then we grouped the IDPs into subgroups based on these variables to then calculate the average scores that can be compared to the host community. In the simulations, we can do this iteratively for multiple possible grouping variables (see Annex I for details) in all four displacement contexts. The results suggest:
\begin{itemize}
\item \textbf{The grouping of the IDP population into cells did not strongly affect the how many IDPs exit the stock}.
\item The reason for this is that \textbf{achieving a pass on all group averages for all sub-criterion is a hard benchmark to pass}.
\item More fine-grained groupings (e.g. by clan, by departure period, and by district) can potentially lead to more exits from the stock as some small groups might outperform the host population while this is unlikely for bigger groups (e.g. by gender or region of origin).
\item It is recommended that grouping variables yield similarly sized groups.
\end{itemize}
\bigskip
Example differences across groupings in Nigeria:
\begin{center}
\scalebox{0.95}{
\scriptsize
\begin{tabular}[t]{lll>{\raggedleft\arraybackslash}p{6.7em}>{\raggedleft\arraybackslash}p{6.7em}>{\raggedright\arraybackslash}p{8em}}
\toprule
Group variable 1 & Group variable 2 & Group variable 3 & Average number of IDPs exiting the stock & Average percentage of IDPs exiting the stock & Range and mean of group size\\
\midrule
Year of arrival & Region of origin & Region of displacement & 43.99 & 0.03 & 3 to 46270 (\textasciitilde{} 1221 )\\
Year of displacement & Year of arrival & Region of origin & 72.29 & 0.06 & 12 to 45057 (\textasciitilde{} 1294 )\\
Year of displacement & Year of arrival & Region of displacement & 73.37 & 0.06 & 3 to 49797 (\textasciitilde{} 1407 )\\
Year of displacement & Region of origin & Region of displacement & 49.59 & 0.04 & 3 to 43390 (\textasciitilde{} 1362 )\\
\midrule
Algorithmic clustering & & & 328.35 & 0.25 & \\
\bottomrule
\end{tabular}
}
\end{center}
\bigskip
\footnotesize
\textbf{Approach II: Algorithmic clustering:} Second, we employed automatic hierarchical clustering algorithms to group the IDPs into groups based on their performance on the different indicators. An algorithm identifies IDP households in the data that are similar to each other in the sense that they perform equally well on certain sub-criteria while equally bad on others. These automatic data-driven groupings yield IDP cells that are most homogeneous within the cells and most heterogeneous in comparison to other cells. The results suggest:
\begin{itemize}
\item \textbf{A data-driven cell approach could potentially increase the exit from the IDP stock}. Similarities between IDP households are exploited and the "best-performing" households are grouped together and taken out of the stock.
\item While there are easily applicable software packages for hierarchical clustering, \textbf{further exploration is needed if National Statistical Offices can implement such approaches}.
\end{itemize}
\bigskip
In the example case of Nigeria, algorithmic clustering leads to an average of 328.25 IDPs exiting the stock (0.25\%). This is higher than the mean exit from the IDP stock when using core demographics as groupings (only around `r round(mean(DS_Option4_nigeria$DS),2)` IDPs or `r round(mean(DS_Option4_nigeria$DS_perc)*100,2)`\% of the stock exit). \\
}
\caption[]{Identifying IDP subgroups to implement the homogeneous cell approach}
\label{box:box_cells}
\end{story}
```{r cells_hargeisa, eval = F}
# Do not run as already printed above
count_groupsize <- function(data, cells, x){
group_size = data %>% filter(ID == 1) %>%
group_by(across(as.character(cells[x,]))) %>%
summarise(group_size = sum(WT,na.rm=T))
paste(
paste(c(round(min(group_size$group_size)),round(max(group_size$group_size))),collapse = " to "),
" (~", round(mean(group_size$group_size)),")")
}
hargeisa_groups <- sapply(1:10, function(x){count_groupsize(x,data=data_hargeisa %>% mutate(WT = 1),cells=cells_hargeisa)})
DS_Option4_hargeisa %>%
group_by(cell_1,cell_2,cell_3) %>%
summarise('Average number of IDPs exiting the stock' = round(mean(DS),2),
'Average percentage of IDPs exiting the stock' = round(mean(DS_perc)*100,2)) %>% ungroup %>%
mutate(cell_1 = factor(cell_1,labels = c("Region of origin","Gender","Origin district")),
cell_2 = factor(cell_2,labels = c("Clan","Gender","Origin district")),
cell_3 = factor(cell_3,labels = c("Clan","Departure period","Origin district")))%>%
rename('Group variable 1'=cell_1,'Group variable 2'=cell_2,'Group variable 3'=cell_3) %>%
cbind(., data.frame("Range and mean of group size" = hargeisa_groups)) %>%
kable(., "latex", booktabs =T,linesep = "", caption = "Effect of different grouping variables on overall exit from the IDP stock in Hargeisa") %>%
kable_styling(latex_options = "hold_position", font_size=9) %>%
column_spec(4:5, width = "6.7em")%>%
column_spec(6, width = "8em")
```
```{r cells_nigeria, eval = F}
# Do not run as already printed above
nigeria_groups <- sapply(1:4, function(x){count_groupsize(x,data=data_nigeria,cells=cells_nigeria)})
DS_Option4_nigeria %>%
group_by(cell_1,cell_2,cell_3) %>%
summarise('Average number of IDPs exiting the stock' = round(mean(DS),2),
'Average percentage of IDPs exiting the stock' = round(mean(DS_perc)*100,2)) %>% ungroup %>%
mutate(cell_1 = factor(cell_1,labels = c("Year of arrival","Year of displacement")),
cell_2 = factor(cell_2,labels = c("Year of arrival","Region of origin")),
cell_3 = factor(cell_3,labels = c("Region of origin","Region of displacement")))%>%
rename('Group variable 1'=cell_1,'Group variable 2'=cell_2,'Group variable 3'=cell_3) %>%
cbind(., data.frame("Range and mean of group size" = nigeria_groups)) %>%
kable(., "latex", booktabs =T,linesep = "", caption = "Effect of different grouping variables on overall exit from the IDP stock in Hargeisa") %>%
kable_styling(latex_options = "hold_position", font_size=9) %>%
column_spec(1:3, width = "8em")%>%
column_spec(4:5, width = "7em")%>%
column_spec(6, width = "7em")