update

wangjiwu · May 18, 2019 · 606ee8a · 606ee8a
1 parent c0e82cd
commit 606ee8a
Show file tree

Hide file tree

Showing 36 changed files with 30,935 additions and 24,888 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,116 @@
+# Initially taken from Github's Python gitignore file
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
diff --git a/data_cut_off.py b/data_cut_off.py
@@ -3,25 +3,19 @@
 from sklearn.model_selection import train_test_split
 from sklearn.utils import shuffle
 
-def train_valid_test_split(x_data, y_data,
-        validation_size=0.1, test_size=0.1, shuffle=True):
-    x_, x_test, y_, y_test = train_test_split(x_data, y_data, test_size=test_size, shuffle=shuffle)
-    valid_size = validation_size / (1.0 - test_size)
-    x_train, x_valid, y_train, y_valid = train_test_split(x_, y_, test_size=valid_size, shuffle=shuffle)
-    return x_train, x_valid, x_test, y_train, y_valid, y_test
+# def train_valid_test_split(x_data, y_data,
+#         validation_size=0.1, test_size=0.1, shuffle=True):
+#     x_, x_test, y_, y_test = train_test_split(x_data, y_data, test_size=test_size, shuffle=shuffle)
+#     valid_size = validation_size / (1.0 - test_size)
+#     x_train, x_valid, y_train, y_valid = train_test_split(x_, y_, test_size=valid_size, shuffle=shuffle)
+#     return x_train, x_valid, x_test, y_train, y_valid, y_test
 
 if __name__ == '__main__':
     path = "glue/"
-    pd_all = pd.read_csv(os.path.join(path, "data.csv") )
+    pd_all = pd.read_csv(os.path.join(path, "train.tsv"), sep='\t' )
     pd_all = shuffle(pd_all)
-    x_data, y_data = pd_all.text, pd_all.classtype
 
-    x_train, x_valid, x_test, y_train, y_valid, y_test = \
-            train_valid_test_split(x_data, y_data, 0.1, 0.1)
 
-    train = pd.DataFrame({'label':y_train, 'x_train': x_train})
-    train.to_csv("glue/train.csv", index=False, sep=',')
-    valid = pd.DataFrame({'label':y_valid, 'x_valid': x_valid})
-    valid.to_csv("glue/dev.csv", index=False, sep=',')
-    test = pd.DataFrame({'label':y_test, 'x_test': x_test})
-    test.to_csv("glue/test.csv", index=False, sep=',')
+
+    dev_set = pd_all.iloc[0:pd_all.shape[0]/10]
+    dev_set.to_csv("glue/dev.tsv", index=False, sep='\t')
diff --git a/get_result.py b/get_result.py
@@ -0,0 +1,29 @@
+import os
+import pandas as pd
+
+
+if __name__ == '__main__':
+    path = "tmp/emotion_out/"
+    pd_all = pd.read_csv(os.path.join(path, "test_results.tsv") ,sep='\t',header=None)
+
+    data = pd.DataFrame(columns=['polarity'])
+    print(pd_all.shape)
+
+    for index in pd_all.index:
+        neutral_score = pd_all.loc[index].values[0]
+        positive_score = pd_all.loc[index].values[1]
+        negative_score = pd_all.loc[index].values[2]
+
+        if max(neutral_score, positive_score, negative_score) == neutral_score:
+            # data.append(pd.DataFrame([index, "neutral"],columns=['id','polarity']),ignore_index=True)
+            data.loc[index+1] = ["neutral"]
+        elif max(neutral_score, positive_score, negative_score) == positive_score:
+            #data.append(pd.DataFrame([index, "positive"],columns=['id','polarity']),ignore_index=True)
+            data.loc[index+1] = [ "positive"]
+        else:
+            #data.append(pd.DataFrame([index, "negative"],columns=['id','polarity']),ignore_index=True)
+            data.loc[index+1] = [ "negative"]
+        #print(negative_score, positive_score, negative_score)
+
+    data.to_csv(os.path.join(path, "pre_sample.tsv"),sep = ',')
+    #print(data)