Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
wangjiwu committed May 18, 2019
1 parent c0e82cd commit 606ee8a
Show file tree
Hide file tree
Showing 36 changed files with 30,935 additions and 24,888 deletions.
116 changes: 116 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
# Initially taken from Github's Python gitignore file

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
.python-version

# celery beat schedule file
celerybeat-schedule

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/
26 changes: 10 additions & 16 deletions data_cut_off.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,25 +3,19 @@
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle

def train_valid_test_split(x_data, y_data,
validation_size=0.1, test_size=0.1, shuffle=True):
x_, x_test, y_, y_test = train_test_split(x_data, y_data, test_size=test_size, shuffle=shuffle)
valid_size = validation_size / (1.0 - test_size)
x_train, x_valid, y_train, y_valid = train_test_split(x_, y_, test_size=valid_size, shuffle=shuffle)
return x_train, x_valid, x_test, y_train, y_valid, y_test
# def train_valid_test_split(x_data, y_data,
# validation_size=0.1, test_size=0.1, shuffle=True):
# x_, x_test, y_, y_test = train_test_split(x_data, y_data, test_size=test_size, shuffle=shuffle)
# valid_size = validation_size / (1.0 - test_size)
# x_train, x_valid, y_train, y_valid = train_test_split(x_, y_, test_size=valid_size, shuffle=shuffle)
# return x_train, x_valid, x_test, y_train, y_valid, y_test

if __name__ == '__main__':
path = "glue/"
pd_all = pd.read_csv(os.path.join(path, "data.csv") )
pd_all = pd.read_csv(os.path.join(path, "train.tsv"), sep='\t' )
pd_all = shuffle(pd_all)
x_data, y_data = pd_all.text, pd_all.classtype

x_train, x_valid, x_test, y_train, y_valid, y_test = \
train_valid_test_split(x_data, y_data, 0.1, 0.1)

train = pd.DataFrame({'label':y_train, 'x_train': x_train})
train.to_csv("glue/train.csv", index=False, sep=',')
valid = pd.DataFrame({'label':y_valid, 'x_valid': x_valid})
valid.to_csv("glue/dev.csv", index=False, sep=',')
test = pd.DataFrame({'label':y_test, 'x_test': x_test})
test.to_csv("glue/test.csv", index=False, sep=',')

dev_set = pd_all.iloc[0:pd_all.shape[0]/10]
dev_set.to_csv("glue/dev.tsv", index=False, sep='\t')
29 changes: 29 additions & 0 deletions get_result.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import os
import pandas as pd


if __name__ == '__main__':
path = "tmp/emotion_out/"
pd_all = pd.read_csv(os.path.join(path, "test_results.tsv") ,sep='\t',header=None)

data = pd.DataFrame(columns=['polarity'])
print(pd_all.shape)

for index in pd_all.index:
neutral_score = pd_all.loc[index].values[0]
positive_score = pd_all.loc[index].values[1]
negative_score = pd_all.loc[index].values[2]

if max(neutral_score, positive_score, negative_score) == neutral_score:
# data.append(pd.DataFrame([index, "neutral"],columns=['id','polarity']),ignore_index=True)
data.loc[index+1] = ["neutral"]
elif max(neutral_score, positive_score, negative_score) == positive_score:
#data.append(pd.DataFrame([index, "positive"],columns=['id','polarity']),ignore_index=True)
data.loc[index+1] = [ "positive"]
else:
#data.append(pd.DataFrame([index, "negative"],columns=['id','polarity']),ignore_index=True)
data.loc[index+1] = [ "negative"]
#print(negative_score, positive_score, negative_score)

data.to_csv(os.path.join(path, "pre_sample.tsv"),sep = ',')
#print(data)
Loading

0 comments on commit 606ee8a

Please sign in to comment.