Skip to content

Commit

Permalink
apache#21 [euphoria-core] Javadocs for Repartition
Browse files Browse the repository at this point in the history
  • Loading branch information
Novotnik, Petr authored and David Moravek committed May 15, 2018
1 parent ccb0c34 commit cb8414a
Showing 1 changed file with 69 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,33 @@
import java.util.Objects;

/**
* Repartition input to some other number of partitions.
* Repartition the input dataset. Repartioning allows 1) to redistribute
* a dataset's elements across their partitions and/or 2) to define the
* number of partitions of a dataset.
*
* Example:
*
* <pre>{@code
* Dataset<String> strings = ...;
* strings = Repartition
* .of(strings)
* .setNumPartitions(10)
* .setPartitioner(new HashPartitioner<>())
* .output();
* }</pre>
*
* Here, the input dataset is repartitioned into 10 partitions, distributing
* the elements based on their hash code as computed by `String#hashCode`.<p>
*
* {@code #setNumPartitions} is optional and will default to the number of
* partitions of the input dataset. Effectively merely redistributing the
* dataset elements according to the specified partitioner.<p>
*
* Also {@code #setPartitioner} is optional, defaulting to
* {@link cz.seznam.euphoria.core.client.dataset.partitioning.HashPartitioner}.
*
* Note: as with all Euphoria operators, you must continue to use the
* repartition operator's output dataset to make the operation effective.
*/
@Basic(
state = StateComplexity.ZERO,
Expand All @@ -42,6 +68,15 @@ public static class OfBuilder {
this.name = name;
}

/**
* Specifies the input dataset to be repartitioned.
*
* @param <IN> the type of elements in the input dataset
*
* @param input the input dataset to process
*
* @return the next builder to complete the setup of the repartition operator
*/
public <IN> OutputBuilder<IN> of(Dataset<IN> input) {
return new OutputBuilder<>(name, input);
}
Expand All @@ -61,6 +96,12 @@ public static class OutputBuilder<IN>
this.input = Objects.requireNonNull(input);
}

/**
* Finalizes the setup of the {@link Repartition} operator and retrieves
* the dataset representing the repartitioned input dataset.
*
* @return the dataset represeting the repartitioned input
*/
@Override
public Dataset<IN> output() {
Flow flow = input.getFlow();
Expand All @@ -71,10 +112,30 @@ public Dataset<IN> output() {
}
}

/**
* Starts building a nameless {@link Repartition} operator over the given
* input dataset.
*
* @param <IN> the type of elements in the input dataset
*
* @param input the input dataset to process
*
* @return a builder to complete the setup of the {@link Repartition} operator
*
* @see #named(String)
* @see OfBuilder#of(Dataset)
*/
public static <IN> OutputBuilder<IN> of(Dataset<IN> input) {
return new OutputBuilder<>("Repartition", input);
}

/**
* Starts building a named {@link Repartition} operator.
*
* @param name a user provided name of the new operator to build
*
* @return a builder to complete the setup of the new {@link Repartition} operator
*/
public static OfBuilder named(String name) {
return new OfBuilder(name);
}
Expand All @@ -87,10 +148,14 @@ public static OfBuilder named(String name) {
this.partitioning = partitioning;
}

/**
* Retrieves the partitioning information according which this operators
* input dataset is to be redistributed.
*
* @return the partitioning schema of this {@link Repartition} operator
*/
@Override
public Partitioning<IN> getPartitioning() {
return partitioning;
}


}
}

0 comments on commit cb8414a

Please sign in to comment.