forked from sungminoh/Sentences-analysis
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathschema.sql
100 lines (90 loc) · 3.08 KB
/
schema.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
drop table if exists sentence_word_relations;
drop table if exists rule_word_relations;
drop table if exists rules;
drop table if exists rulesets;
drop table if exists words;
drop table if exists sentences;
drop table if exists posts;
drop table if exists topics;
drop table if exists sources;
drop table if exists rule_sentence_relations;
/* [static] topic of an analysis */
CREATE TABLE topics(
_id INT PRIMARY KEY AUTO_INCREMENT,
name VARCHAR(255) CHARACTER SET UTF8MB4
);
/* [static] We will use only few sources. Twitter / Instagram / YouTube / NewsInWeb. */
CREATE TABLE sources(
_id INT PRIMARY KEY AUTO_INCREMENT,
name VARCHAR(255) CHARACTER SET UTF8MB4
);
/* vocab */
CREATE TABLE words(
_id INT PRIMARY KEY AUTO_INCREMENT,
word VARCHAR(128) CHARACTER SET UTF8MB4,
UNIQUE KEY `idx_word` (`word`) USING HASH
);
/* each post in a topic, crawled from one of the sources */
CREATE TABLE posts(
_id INT PRIMARY KEY AUTO_INCREMENT,
topic_id INT,
source_id INT,
title VARCHAR(255) CHARACTER SET UTF8MB4,
url TEXT CHARACTER SET UTF8MB4,
timestamp TIMESTAMP NULL,
FOREIGN KEY (topic_id) REFERENCES topics(_id),
FOREIGN KEY (source_id) REFERENCES sources(_id)
);
/* each sentence in a post. Since sentence is the base unit for grading the rules, we decided to create this table */
CREATE TABLE sentences(
post_id INT,
sentence_seq INT,
full_text TEXT CHARACTER SET UTF8MB4,
PRIMARY KEY (post_id, sentence_seq),
FOREIGN KEY (post_id) REFERENCES posts(_id)
);
/* store the word orders */
CREATE TABLE sentence_word_relations(
post_id INT,
sentence_seq INT,
word_seq INT,
word_id INT,
PRIMARY KEY (post_id, sentence_seq, word_seq),
FOREIGN KEY (post_id, sentence_seq) REFERENCES sentences(post_id, sentence_seq),
FOREIGN KEY (word_id) REFERENCES words(_id)
);
/* [static] each topic has 4~6 rulesets. (most of them has 5 categories) */
CREATE TABLE rulesets(
topic_id INT,
category_seq INT, /* since it would be weird to see '103th ruleset' in screen, I changed this to 'category_number' */
name VARCHAR(255) CHARACTER SET UTF8MB4,
PRIMARY KEY (topic_id, category_seq),
FOREIGN KEY (topic_id) REFERENCES topics(_id)
);
/* each rules in a ruleset */
CREATE TABLE rules(
_id INT PRIMARY KEY AUTO_INCREMENT,
topic_id INT,
category_seq INT,
full_text TEXT CHARACTER SET UTF8MB4,
INDEX `idx_topicid_categoryseq` (`topic_id` ASC, `category_seq` ASC),
FOREIGN KEY (topic_id, category_seq) REFERENCES rulesets(topic_id, category_seq)
);
/* store the word orders */
CREATE TABLE rule_word_relations(
rule_id INT,
word_seq INT,
word_id INT,
PRIMARY KEY (rule_id, word_seq),
FOREIGN KEY (rule_id) REFERENCES rules(_id),
FOREIGN KEY (word_id) REFERENCES words(_id)
);
/* store the analysis resuts */
CREATE TABLE rule_sentence_relations(
rule_id INT.
post_id INT,
sentence_seq INT
PRIMARY KEY (rule_id, post_id, sentence_seq),
FOREIGN KEY (rule_id) REFERENCES rules(_id)
FOREIGN KEY (post_id, sentence_seq) REFERENCES sentences(post_id, sentence_seq)
);