forked from schelterlabs/uva-bigdata-course-2021-students
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathMapReduceEngine.java
89 lines (65 loc) · 2.53 KB
/
MapReduceEngine.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
package nl.uva.bigdata.hadoop.assignment2;
import java.util.*;
class Record<K extends Comparable<K>, V> {
K key;
V value;
public Record(K key, V value) {
this.key = key;
this.value = value;
}
public K getKey() {
return key;
}
public V getValue() {
return value;
}
}
interface MapFunction<K1 extends Comparable<K1>, V1, K2 extends Comparable<K2>, V2> {
Collection<Record<K2, V2>> map(Record<K1, V1> inputRecord);
}
interface ReduceFunction<K2 extends Comparable<K2>, V2, V3> {
Collection<Record<K2, V3>> reduce(K2 key, Collection<V2> valueGroup);
}
public class MapReduceEngine<K1 extends Comparable<K1>, V1, K2 extends Comparable<K2>, V2, V3> {
private Collection<Record<K2, V2>> runMapPhase(
Collection<Record<K1, V1>> inputRecords,
MapFunction<K1, V1, K2, V2> map
) {
//TODO Implement me
throw new IllegalStateException("Not implemented");
}
private Collection<Collection<Record<K2, V2>>> partitionMapOutputs(
Collection<Record<K2, V2>> mapOutputs,
int numPartitions) {
//TODO Implement me
throw new IllegalStateException("Not implemented");
}
private Map<K2, Collection<V2>> groupReducerInputPartition(Collection<Record<K2, V2>> reducerInputPartition) {
//TODO Implement me
throw new IllegalStateException("Not implemented");
}
private Collection<Record<K2, V3>> runReducePhaseOnPartition(
Map<K2, Collection<V2>> reducerInputs,
ReduceFunction<K2, V2, V3> reduce
) {
//TODO Implement me
throw new IllegalStateException("Not implemented");
}
public Collection<Record<K2, V3>> compute(
Collection<Record<K1, V1>> inputRecords,
MapFunction<K1, V1, K2, V2> map,
ReduceFunction<K2, V2, V3> reduce,
int numPartitionsDuringShuffle
) {
Collection<Record<K2, V2>> mapOutputs = runMapPhase(inputRecords, map);
Collection<Collection<Record<K2, V2>>> partitionedMapOutput =
partitionMapOutputs(mapOutputs, numPartitionsDuringShuffle);
assert numPartitionsDuringShuffle == partitionedMapOutput.size();
List<Record<K2, V3>> outputs = new ArrayList<>();
for (Collection<Record<K2, V2>> partition : partitionedMapOutput) {
Map<K2, Collection<V2>> reducerInputs = groupReducerInputPartition(partition);
outputs.addAll(runReducePhaseOnPartition(reducerInputs, reduce));
}
return outputs;
}
}