-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcut.py
44 lines (34 loc) · 872 Bytes
/
cut.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# -*- coding: utf-8 -*-
import jieba
# ReadFiles
def ReadTxt(path):
file_in = open(path, 'r', encoding='utf-8')
con = []
for line in file_in.readlines():
line = line.strip()
con.append(line)
file_in.close()
return con
# write to txt
def WriteToFile(path, text):
file_out = open(path, 'w', encoding='utf-8')
file_out.write(text)
file_out.close()
# 切词
def word_cut(lines):
l_list = []
for line in lines:
line = '/'.join(jieba.cut(line))
l_list.append(line)
return l_list
def main():
path = r'F:\毕业论文\test\tf-idf\20161101-20161130.txt'
text = ReadTxt(path)
l_list = word_cut(text)
file_out = open(path, 'w', encoding='utf-8')
for line in l_list:
file_out.write(line+'\n')
file_out.close()
print('done')
if __name__ == '__main__':
main()