more examples for classifier to distinguish mapreduce from spark prop…

…erly
apache · Jun 2, 2015 · 63efda2 · 63efda2
1 parent aeb6bb6
commit 63efda2
Showing 1 changed file with 10 additions and 1 deletion.
diff --git a/examples/src/main/python/ml/cross_validator.py b/examples/src/main/python/ml/cross_validator.py
@@ -42,7 +42,16 @@
     training = sc.parallelize([(0, "a b c d e spark", 1.0),
                                (1, "b d", 0.0),
                                (2, "spark f g h", 1.0),
-                               (3, "hadoop mapreduce", 0.0)]) \
+                               (3, "hadoop mapreduce", 0.0),
+                               (4, "b spark who", 1.0),
+                               (5, "g d a y", 0.0),
+                               (6, "spark fly", 1.0),
+                               (7, "was mapreduce", 0.0),
+                               (8, "e spark program", 1.0),
+                               (9, "a e c l", 0.0),
+                               (10, "spark compile", 1.0),
+                               (11, "hadoop software", 0.0)
+                               ]) \
         .map(lambda x: LabeledDocument(*x)).toDF()
 
     # Configure an ML pipeline, which consists of tree stages: tokenizer, hashingTF, and lr.