From 6941f1de64795fae017a14b1dd1aedfcd2d71869 Mon Sep 17 00:00:00 2001 From: andrewhn Date: Fri, 6 May 2016 01:46:16 +1000 Subject: [PATCH] add unicode data to tests (#432) * add unicode data to tests * make tests pass on 2.7 * clean up data loading - remove duplicate keys in slice_data - reduce line length * change manager option flag to -t, --load-test-data * test --> load_test_data --- caravel/bin/caravel | 10 ++- caravel/data/__init__.py | 89 ++++++++++++++++++++++- caravel/data/unicode_utf8_unixnl_test.csv | 42 +++++++++++ caravel/viz.py | 2 +- tests/core_tests.py | 3 +- 5 files changed, 140 insertions(+), 6 deletions(-) create mode 100644 caravel/data/unicode_utf8_unixnl_test.csv diff --git a/caravel/bin/caravel b/caravel/bin/caravel index 5d1e21504c097..5a5bc23e5eb8d 100755 --- a/caravel/bin/caravel +++ b/caravel/bin/caravel @@ -68,9 +68,9 @@ def version(): print(s) @manager.option( - '-s', '--sample', action='store_true', - help="Only load 1000 rows (faster, used for testing)") -def load_examples(sample): + '-t', '--load-test-data', action='store_true', + help="Load additional test data") +def load_examples(load_test_data): """Loads a set of Slices and Dashboards and a supporting dataset """ print("Loading examples into {}".format(db)) @@ -85,6 +85,10 @@ def load_examples(sample): print("Loading [Birth names]") data.load_birth_names() + if load_test_data: + print("Loading [Unicode test data]") + data.load_unicode_test_data() + @manager.command def refresh_druid(): """Refresh all druid datasources""" diff --git a/caravel/data/__init__.py b/caravel/data/__init__.py index 69ad02cb3d2f3..7538edc020a46 100644 --- a/caravel/data/__init__.py +++ b/caravel/data/__init__.py @@ -8,9 +8,11 @@ import json import os import textwrap +import datetime +import random import pandas as pd -from sqlalchemy import String, DateTime, Float +from sqlalchemy import String, DateTime, Date, Float from caravel import app, db, models, utils @@ -808,3 +810,88 @@ def load_birth_names(): dash.slices = slices[:-1] db.session.merge(dash) db.session.commit() + + +def load_unicode_test_data(): + """Loading unicode test dataset from a csv file in the repo""" + df = pd.read_csv(os.path.join(DATA_FOLDER, 'unicode_utf8_unixnl_test.csv'), + encoding="utf-8") + # generate date/numeric data + df['date'] = datetime.datetime.now().date() + df['value'] = [random.randint(1, 100) for _ in range(len(df))] + df.to_sql( + 'unicode_test', + db.engine, + if_exists='replace', + chunksize=500, + dtype={ + 'phrase': String(500), + 'short_phrase': String(10), + 'with_missing': String(100), + 'date': Date(), + 'value': Float(), + }, + index=False) + print("Done loading table!") + print("-" * 80) + + print("Creating table reference") + obj = db.session.query(TBL).filter_by(table_name='unicode_test').first() + if not obj: + obj = TBL(table_name='unicode_test') + obj.main_dttm_col = 'date' + obj.database = get_or_create_db(db.session) + obj.is_featured = False + db.session.merge(obj) + db.session.commit() + obj.fetch_metadata() + tbl = obj + + slice_data = { + "datasource_id": "3", + "datasource_name": "unicode_test", + "datasource_type": "table", + "flt_op_1": "in", + "granularity": "date", + "groupby": [], + "metric": 'sum__value', + "row_limit": config.get("ROW_LIMIT"), + "since": "100 years ago", + "until": "now", + "where": "", + "viz_type": "word_cloud", + "size_from": "10", + "series": "short_phrase", + "size_to": "70", + "rotation": "square", + "limit": "100", + } + + print("Creating a slice") + slc = Slice( + slice_name="Unicode Cloud", + viz_type='word_cloud', + datasource_type='table', + table=tbl, + params=get_slice_json(slice_data), + ) + merge_slice(slc) + + print("Creating a dashboard") + dash = db.session.query(Dash).filter_by(dashboard_title="Unicode Test").first() + + if not dash: + dash = Dash() + pos = { + "size_y": 4, + "size_x": 4, + "col": 1, + "row": 1, + "slice_id": slc.id, + } + dash.dashboard_title = "Unicode Test" + dash.position_json = json.dumps([pos], indent=4) + dash.slug = "unicode-test" + dash.slices = [slc] + db.session.merge(dash) + db.session.commit() diff --git a/caravel/data/unicode_utf8_unixnl_test.csv b/caravel/data/unicode_utf8_unixnl_test.csv new file mode 100644 index 0000000000000..9b0235b0ddd0b --- /dev/null +++ b/caravel/data/unicode_utf8_unixnl_test.csv @@ -0,0 +1,42 @@ +phrase,short_phrase,with_missing +"Под южно дърво, цъфтящо в синьо, бягаше малко пухкаво зайче.",Под южно д,Fam hx-cardiovas dis NEC +Příliš žluťoučký kůň úpěl ďábelské ódy.,Příliš žlu, +視野無限廣,窗外有藍天,視野無限廣,窗外有藍,Sparganosis +微風迎客,軟語伴茶,微風迎客,軟語伴茶,Var mgr NEC wo ntc mgr +中国智造,慧及全球,中国智造,慧及全球,Mech prob w internal org +"Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen Walther spillede på xylofon.",Quizdeltag,Corneal dystrophy NOS +Pa’s wijze lynx bezag vroom het fikse aquaduct.,Pa’s wijze,Edema in preg-unspec +Eĥoŝanĝo ĉiuĵaŭde.,Eĥoŝanĝo ĉ, +See väike mölder jõuab rongile hüpata,See väike ,Twin NOS-nonhosp +Viekas kettu punaturkki laiskan koiran takaa kurkki.,Viekas ket,Postgastric surgery synd +Voix ambiguë d’un cœur qui au zéphyr préfère les jattes de kiwis.,Voix ambig,Loose body-mult joints +Portez ce vieux whisky au juge blond qui fume.,Portez ce ,Late eff acc poisoning +Zwölf Boxkämpfer jagen Viktor quer über den großen Sylter Deich,Zwölf Boxk,Opn brain inj w/o coma +Franz jagt im komplett verwahrlosten Taxi quer durch Bayern.,Franz jagt,TB of ear-unspec +Θέλει αρετή και τόλμη η ελευθερία. (Ανδρέας Κάλβος),Θέλει αρετ,Chr peptic ulcer w perf +Ο καλύμνιος σφουγγαράς ψιθύρισε πως θα βουτήξει χωρίς να διστάζει.,Ο καλύμνιο,Cns TB NEC-cult dx +דג סקרן שט לו בים זך אך לפתע פגש חבורה נחמדה שצצה כך.,דג סקרן שט,Polyhydramnios-delivered +Árvíztűrő tükörfúrógép,Árvíztűrő ,Malign neopl scrotum +"Egy hűtlen vejét fülöncsípő, dühös mexikói úr Wesselényinél mázol Quitóban.",Egy hűtlen,Tubal/broad lig anom NOS +Saya lihat foto Hamengkubuwono XV bersama enam zebra purba cantik yang jatuh dari Al Quranmu.,Saya lihat,Ben carcinoid duodenum +"Ma la volpe, col suo balzo, ha raggiunto il quieto Fido.",Ma la volp,Ch leu un cl wo ach rmsn +いろはにほへと ちりぬるを わかよたれそ つねならむ うゐのおくやま けふこえて あさきゆめみし ゑひもせす,いろはにほへと ちり,Mycotic arthritis-pelvis +다람쥐 헌 쳇바퀴에 타고파,다람쥐 헌 쳇바퀴에,Paral polio NEC-type 1 +Sarkanās jūrascūciņas peld pa jūru.,Sarkanās j,Fx larynx/trachea-open +En god stil må først og fremst være klar. Den må være passende. Aristoteles.,En god sti,Dermatophytosis site NOS +Pchnąć w tę łódź jeża lub ośm skrzyń fig,Pchnąć w t,Anxiety disorder oth dis +A rápida raposa castanha salta por cima do cão lento.,A rápida r,Adenoid vegetations +A ligeira raposa marrom ataca o cão preguiçoso.,A ligeira ,Consanguinity +Zebras caolhas de Java querem passar fax para moças gigantes de New York,Zebras cao,"Hypotony NOS, eye" +Agera vulpe maronie sare peste câinele cel leneş.,Agera vulp,Urethral syndrome NOS +Съешь ещё этих мягких французских булок да выпей же чаю,Съешь ещё ,Coccidioidomycosis NOS +Чешће цeђење мрeжастим џаком побољшава фертилизацију генских хибрида.,Чешће цeђе, +Češće ceđenje mrežastim džakom poboljšava fertilizaciju genskih hibrida.,Češće ceđe,Scrn-hemoglobinopath NEC +Kŕdeľ šťastných ďatľov učí pri ústí Váhu mĺkveho koňa obhrýzať kôru a žrať čerstvé mäso.,Kŕdeľ šťas, +V kožuščku hudobnega fanta stopiclja mizar in kliče 0619872345.,V kožuščku, +El veloz murciélago hindú comía feliz cardillo y kiwi. La cigüeña tocaba el saxofón detrás del palenque de paja.,El veloz m,Cervical syndrome NEC +Flygande bäckasiner söka hwila på mjuka tuvor,Flygande b,Letterer-siwe dis abdom +เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะ ๆ จ๋า ๆ น่าฟังเอยฯ,เป็นมนุษย์,Balantidiasis +"Pijamalı hasta, yağız şoföre çabucak güvendi",Pijamalı h,Epilepsy-delivered w p/p +زۆھرەگۈل ئابدۇۋاجىت فرانسىيەنىڭ پارىژدىكى خېلى بىشەم ئوقۇغۇچى.,زۆھرەگۈل ئ,Fit/adj non-vsc cath NEC +ئاۋۇ بىر جۈپ خوراز فرانسىيەنىڭ پارىژ شەھرىگە يېقىن تاغقا كۆچەلمىدى.,ئاۋۇ بىر ج,Sat cerv smr-no trnsfrm diff --git a/caravel/viz.py b/caravel/viz.py index 70504f6f3042d..6cc0523931db7 100644 --- a/caravel/viz.py +++ b/caravel/viz.py @@ -286,7 +286,7 @@ def data(self): def get_csv(self): df = self.get_df() include_index = not isinstance(df.index, pd.RangeIndex) - return df.to_csv(index=include_index) + return df.to_csv(index=include_index, encoding="utf-8") def get_data(self): return [] diff --git a/tests/core_tests.py b/tests/core_tests.py index bace46f4a3e83..362bd56cbbd93 100644 --- a/tests/core_tests.py +++ b/tests/core_tests.py @@ -104,7 +104,7 @@ def tearDown(self): pass def load_examples(self): - cli.load_examples(sample=True) + cli.load_examples(load_test_data=True) def test_save_slice(self): self.login_admin() @@ -137,6 +137,7 @@ def test_slices(self): urls += [ (slc.slice_name, slc.slice_url), (slc.slice_name, slc.viz.json_endpoint), + (slc.slice_name, slc.viz.csv_endpoint), ] for name, url in urls: print("Slice: " + name)