-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlinkstat.sh
executable file
·129 lines (102 loc) · 2.7 KB
/
linkstat.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#!/bin/bash
#generate the number of links that are connected to other dataset
#author : Nur Aini
#since : January 24, 2012
#Usage : chmod +x linkstat.sh
# ./linkstat.sh ntriplesfile
skipproperties='\"|http://www.w3.org/2002/07/owl#equivalentclass|http://www.w3.org/2002/07/owl#equivalentProperty|http://www.w3.org/2000/01/rdf-schema#subClassOf|^_|http://www.w3.org/2000/01/rdf-schema#Class|http://www.w3.org/1999/02/22-rdf-syntax-ns#Property|http://xmlns.com/foaf/0.1/page'
type="http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
grep -E -v -i $skipproperties $1 |
awk '
BEGIN { FS = ">[\t ]<" }
{
s=$1
p=$2
o=substr($3, 1, length($3)-4)
if ( match($2,"http://www.w3.org/1999/02/22-rdf-syntax-ns#type")!=0)
{
SubinClass[s]=o
}
else if ( $3 !~ /.*\.(pdf|html|asp|php|jpg)/ )
{
if(match(s, "<http://(.*)#", subj)!=0 || match(s, /<http:\/\/(.*):/, subj) !=0)
{
subvalue=subj[1]
}
else (match(s, /<http:\/\/(.*)\/.+$/, subj) !=0)
{
if(match(subj[1], /(.*)(resource|inserts)/, tessub) !=0)
{
subvalue=tessub[1]
}
else
subvalue=subj[1]
}
if(match(o, "http://(.*)#", obj)!=0 || match(o, /http:\/\/(.*):/, obj) !=0)
{
objvalue=obj[1]
}
else (match(o, "http://(.*)/.*", obj) !=0)
{
if(match(obj[1], /(.*)(resource|inserts)/, tes) !=0)
objvalue=tes[1]
else
objvalue=obj[1]
}
if (subvalue != objvalue)
{
if(match(p,".*[#/].*([Pp]age|[Ll]ink).*$")==0 )
{
arr[p]++
#drugbank issue
if(match(objvalue,"129.128.185.122/drugbank2/*"))
objvalue="129.128.185.122/drugbank2"
SubLink[s,p,objvalue]++
otherdataset[objvalue]++
}
}
}
}
END {
total =0
print "Link to other dataset"
for(no in arr) {
print arr[no], no
total=total+arr[no]
}
print total
delete arr
print "--------------"
print "Other dataset"
total =0
for(idother in otherdataset) {
print otherdataset[idother], idother
total=total+otherdataset[idother]
}
print total
delete otherdataset
print "--------------"
print "Number of Link each Class"
print "Class Link OtherDataset TotalLink Distinct Subject Average"
for(isublink in SubLink)
{
split(isublink, idsub, SUBSEP);
ClassLink[SubinClass[idsub[1]],idsub[2],idsub[3]]=ClassLink[SubinClass[idsub[1]],idsub[2],idsub[3]]+SubLink[isublink]
}
#calculate distinct subject each class
for (idSub in SubinClass)
EntitySub[SubinClass[idSub]]++
cost_p=0
cost_c=0
for (iClassLink in ClassLink)
{
split(iClassLink, identity, SUBSEP)
cost_p = cost_p + EntitySub[identity[1]]
cost_c++
average=ClassLink[iClassLink]/EntitySub[identity[1]]
print identity[1],identity[2],identity[3],ClassLink[iClassLink],EntitySub[identity[1]],average
}
print "cost publisher " cost_p
print "cost consumer " cost_c
print "total cost " cost_p+cost_c
}'