diff --git a/bundlelist/ld-sites/pom.xml b/bundlelist/ld-sites/pom.xml new file mode 100644 index 0000000..8c5ce0b --- /dev/null +++ b/bundlelist/ld-sites/pom.xml @@ -0,0 +1,67 @@ + + + + 4.0.0 + + eu.fusepool.p3.stanbol-launcher + stanbol-launcher-reactor + 1.0.0-SNAPSHOT + ../.. + + + stanbol-launcher-ld-sites-bundlelist + feature + + Fusepool Linked Data Site Bundlelist + + Provides Indexes for well known Vocabularies for EntityLinking. Each Site + already comes with a comprehencive default configuration for Entity Linking + that can be used wiht the Fusepool Literal Extraction Transformer + + + + + + eu.fusepool.p3.stanbol-launcher + stanbol-data-site-iptc + ${project.version} + + + eu.fusepool.p3.stanbol-launcher + stanbol-data-site-stw + ${project.version} + + + eu.fusepool.p3.stanbol-launcher + stanbol-data-site-thesoz + ${project.version} + + + + + + + org.wymiwyg.karaf.tooling + karaf-sling-maven-plugin + + + + + diff --git a/data/site-iptc/pom.xml b/data/site-iptc/pom.xml new file mode 100644 index 0000000..c05e3a2 --- /dev/null +++ b/data/site-iptc/pom.xml @@ -0,0 +1,63 @@ + + + + + 4.0.0 + + + eu.fusepool.p3.stanbol-launcher + stanbol-launcher-reactor + 1.0.0-SNAPSHOT + ../.. + + + stanbol-data-site-iptc + bundle + + IPTC Media Topics + + Provides the IPTC mdia topics for entity linking + + + 2015 + + + site + config + + + + + + org.apache.felix + maven-bundle-plugin + true + + + ${data.path} + + -100 + + ${config.path} + + + + + + + diff --git a/data/site-iptc/src/main/resources/config/iptc.solrindex.ref b/data/site-iptc/src/main/resources/config/iptc.solrindex.ref new file mode 100644 index 0000000..a77cbfb --- /dev/null +++ b/data/site-iptc/src/main/resources/config/iptc.solrindex.ref @@ -0,0 +1,5 @@ +#Mon Jun 29 12:21:02 CEST 2015 +Name=SolrIndex for iptc +Synchronized=true +Description=IPTC Media Topics +Index-Archive=iptc.solrindex.zip diff --git a/data/site-iptc/src/main/resources/config/org.apache.stanbol.enhancer.chain.weighted.impl.WeightedChain-iptc_linking.config b/data/site-iptc/src/main/resources/config/org.apache.stanbol.enhancer.chain.weighted.impl.WeightedChain-iptc_linking.config new file mode 100644 index 0000000..e9d2480 --- /dev/null +++ b/data/site-iptc/src/main/resources/config/org.apache.stanbol.enhancer.chain.weighted.impl.WeightedChain-iptc_linking.config @@ -0,0 +1,3 @@ +stanbol.enhancer.chain.weighted.chain=["langdetect","opennlp-sentence","opennlp-token","opennlp-pos","iptc-linking","text-annotation-new-model","fise2fam"] +stanbol.enhancer.chain.chainproperties=[""] +stanbol.enhancer.chain.name="iptc-linking" diff --git a/data/site-iptc/src/main/resources/config/org.apache.stanbol.enhancer.chain.weighted.impl.WeightedChain-iptc_plain_linking.config b/data/site-iptc/src/main/resources/config/org.apache.stanbol.enhancer.chain.weighted.impl.WeightedChain-iptc_plain_linking.config new file mode 100644 index 0000000..71d520c --- /dev/null +++ b/data/site-iptc/src/main/resources/config/org.apache.stanbol.enhancer.chain.weighted.impl.WeightedChain-iptc_plain_linking.config @@ -0,0 +1,3 @@ +stanbol.enhancer.chain.weighted.chain=["langdetect","opennlp-sentence","opennlp-token","iptc-plain-linking","text-annotation-new-model","fise2fam"] +stanbol.enhancer.chain.chainproperties=[""] +stanbol.enhancer.chain.name="iptc-plain-linking" diff --git a/data/site-iptc/src/main/resources/config/org.apache.stanbol.enhancer.engines.lucenefstlinking.FstLinkingEngineComponent-iptc_linking.config b/data/site-iptc/src/main/resources/config/org.apache.stanbol.enhancer.engines.lucenefstlinking.FstLinkingEngineComponent-iptc_linking.config new file mode 100644 index 0000000..69d075d --- /dev/null +++ b/data/site-iptc/src/main/resources/config/org.apache.stanbol.enhancer.engines.lucenefstlinking.FstLinkingEngineComponent-iptc_linking.config @@ -0,0 +1,17 @@ +stanbol.enhancer.engine.name="iptc-linking" +enhancer.engines.linking.includeSimilarScore=B"true" +enhancer.engines.linking.lucenefst.fstThreadPoolSize=I"1" +enhancer.engines.linking.lucenefst.rankingField="entityhub:entityRank" +enhancer.engines.linking.caseSensitive=B"false" +enhancer.engines.linking.lucenefst.fieldEncoding="SolrYard" +enhancer.engines.linking.entityTypes=[""] +enhancer.engines.linking.suggestions=I"3" +enhancer.engines.linking.defaultMatchingLanguage="" +enhancer.engines.linking.lucenefst.entityCacheSize=I"65536" +enhancer.engines.linking.lucenefst.fstfolder="${solr-data-dir}/fst" +enhancer.engines.linking.lucenefst.fstconfig=["*;field\=rdfs:label;generate\=false"] +enhancer.engines.linking.lucenefst.solrcore="iptc" +enhancer.engines.linking.lucenefst.typeField="rdf:type" +enhancer.engines.linking.typeMappings=["skos:Concept"] +enhancer.engines.linking.processedLanguages=["*;lmmtip;uc\=LINK;prob\=0.75;pprob\=0.75","de;uc\=MATCH","es;lc\=Noun","nl;lc\=Noun"] +enhancer.engines.linking.properNounsState=B"false" diff --git a/data/site-iptc/src/main/resources/config/org.apache.stanbol.enhancer.engines.lucenefstlinking.PlainFstLinkingComponnet-iptc_linking.config b/data/site-iptc/src/main/resources/config/org.apache.stanbol.enhancer.engines.lucenefstlinking.PlainFstLinkingComponnet-iptc_linking.config new file mode 100644 index 0000000..81dcefd --- /dev/null +++ b/data/site-iptc/src/main/resources/config/org.apache.stanbol.enhancer.engines.lucenefstlinking.PlainFstLinkingComponnet-iptc_linking.config @@ -0,0 +1,15 @@ +enhancer.engines.linking.includeSimilarScore=B"true" +enhancer.engines.linking.lucenefst.fstThreadPoolSize=I"1" +enhancer.engines.linking.lucenefst.rankingField="entityhub:entityRank" +enhancer.engines.linking.caseSensitive=B"false" +enhancer.engines.linking.lucenefst.fieldEncoding="SolrYard" +enhancer.engines.linking.entityTypes=[""] +enhancer.engines.linking.suggestions=I"3" +enhancer.engines.linking.defaultMatchingLanguage="" +stanbol.enhancer.engine.name="iptc-plain-linking" +enhancer.engines.linking.lucenefst.entityCacheSize=I"65536" +enhancer.engines.linking.lucenefst.fstfolder="${solr-data-dir}/fst" +enhancer.engines.linking.lucenefst.fstconfig=["*;field\=rdfs:label;generate\=false"] +enhancer.engines.linking.lucenefst.solrcore="iptc" +enhancer.engines.linking.lucenefst.typeField="rdf:type" +enhancer.engines.linking.typeMappings=["skos:Concept"] diff --git a/data/site-iptc/src/main/resources/config/org.apache.stanbol.entityhub.core.site.CacheImpl-iptc.config b/data/site-iptc/src/main/resources/config/org.apache.stanbol.entityhub.core.site.CacheImpl-iptc.config new file mode 100644 index 0000000..8611696 --- /dev/null +++ b/data/site-iptc/src/main/resources/config/org.apache.stanbol.entityhub.core.site.CacheImpl-iptc.config @@ -0,0 +1,4 @@ +org.apache.stanbol.entityhub.yard.name="iptc\ Cache" +org.apache.stanbol.entityhub.yard.cacheYardId="iptcIndex" +org.apache.stanbol.entityhub.yard.id="iptcIndex" +org.apache.stanbol.entityhub.yard.description="Cache\ for\ the\ iptc\ Referenced\ Site\ using\ the\ iptcIndex." diff --git a/data/site-iptc/src/main/resources/config/org.apache.stanbol.entityhub.site.referencedSite-iptc.config b/data/site-iptc/src/main/resources/config/org.apache.stanbol.entityhub.site.referencedSite-iptc.config new file mode 100644 index 0000000..93295d7 --- /dev/null +++ b/data/site-iptc/src/main/resources/config/org.apache.stanbol.entityhub.site.referencedSite-iptc.config @@ -0,0 +1,14 @@ +org.apache.stanbol.entityhub.site.licenseName=["Creative\ Commons\ Attribution\ (CC\ BY)\ 4.0\ license"] +org.apache.stanbol.entityhub.site.defaultExpireDuration=I"0" +org.apache.stanbol.entityhub.site.licenseUrl=["http://creativecommons.org/licenses/by/4.0/"] +org.apache.stanbol.entityhub.site.attributionUrl="https://iptc.org/about-iptc/" +org.apache.stanbol.entityhub.site.cacheId="iptcIndex" +org.apache.stanbol.entityhub.site.defaultSymbolState="proposed" +org.apache.stanbol.entityhub.site.name="iptc" +org.apache.stanbol.entityhub.site.entityPrefix=["http://cv.iptc.org/newscodes/"] +org.apache.stanbol.entityhub.site.id="iptc" +org.apache.stanbol.entityhub.site.description="IPTC\ Media\ Topics" +org.apache.stanbol.entityhub.site.attribution="IPTC\ International\ Press\ \u0003Telecommunications\ Council" +org.apache.stanbol.entityhub.site.defaultMappedEntityState="proposed" +org.apache.stanbol.entityhub.site.fieldMappings=("#\ Licensed\ to\ the\ Apache\ Software\ Foundation\ (ASF)\ under\ one\ or\ more","#\ contributor\ license\ agreements.\ \ See\ the\ NOTICE\ file\ distributed\ with","#\ this\ work\ for\ additional\ information\ regarding\ copyright\ ownership.","#\ The\ ASF\ licenses\ this\ file\ to\ You\ under\ the\ Apache\ License,\ Version\ 2.0","#\ (the\ \"License\");\ you\ may\ not\ use\ this\ file\ except\ in\ compliance\ with","#\ the\ License.\ \ You\ may\ obtain\ a\ copy\ of\ the\ License\ at","#","#\ \ \ \ \ http://www.apache.org/licenses/LICENSE-2.0","#","#\ Unless\ required\ by\ applicable\ law\ or\ agreed\ to\ in\ writing,\ software","#\ distributed\ under\ the\ License\ is\ distributed\ on\ an\ \"AS\ IS\"\ BASIS,","#\ WITHOUT\ WARRANTIES\ OR\ CONDITIONS\ OF\ ANY\ KIND,\ either\ express\ or\ implied.","#\ See\ the\ License\ for\ the\ specific\ language\ governing\ permissions\ and","#\ limitations\ under\ the\ License.","#","#NOTE:\ THIS\ IS\ A\ DEFAULT\ MAPPING\ SPECIFICATION\ THAT\ INCLUDES\ MAPPINGS\ FOR","#\ \ \ \ \ \ COMMON\ ONTOLOGIES.\ USERS\ MIGHT\ WANT\ TO\ ADAPT\ THIS\ CONFIGURATION\ BY","#\ \ \ \ \ \ COMMENTING/UNCOMMENTING\ AND/OR\ ADDING\ NEW\ MAPPINGS","","#\ ---\ Define\ the\ Languages\ for\ all\ fields\ ---","#\ to\ restrict\ languages\ to\ be\ imported\ (for\ all\ fields)","#|\ @\=null;en;de;fr;it","","#NOTE:\ null\ is\ used\ to\ import\ labels\ with\ no\ specified\ language","","#\ ---\ Define\ the\ Languages\ for\ all\ fields\ ---","#\ Uncomment\ to\ restrict\ indexing\ to\ a\ specific\ list\ of\ languages,\ otherwise\ all","#\ languages\ are\ indexed","#|\ @\=null;en;de;fr;it","","#\ ---\ RDF\ RDFS\ and\ OWL\ Mappings\ ---","#\ This\ configuration\ only\ index\ properties\ that\ are\ typically\ used\ to\ store","#\ instance\ data\ defined\ by\ such\ namespaces.\ This\ excludes\ ontology\ definitions","","#\ NOTE\ that\ nearly\ all\ other\ ontologies\ are\ are\ using\ properties\ of\ these\ three","#\ \ \ \ \ \ schemas,\ therefore\ it\ is\ strongly\ recommended\ to\ include\ such\ information!","","rdf:type\ |\ d\=entityhub:ref","","rdfs:label\ ","rdfs:comment","rdfs:seeAlso\ |\ d\=entityhub:ref","","","owl:sameAs\ |\ d\=entityhub:ref","","#If\ one\ likes\ to\ also\ index\ ontologies\ one\ should\ add\ the\ following\ statements","#owl:*","#rdfs:*","","#\ ---\ Dublin\ Core\ (DC)\ ---","#\ The\ default\ configuration\ imports\ all\ dc-terms\ data\ and\ copies\ values\ for\ the","#\ old\ dc-elements\ standard\ over\ to\ the\ according\ properties\ of\ the\ dc-terms","#\ standard.","","#\ NOTE\ that\ a\ lot\ of\ other\ ontologies\ are\ also\ using\ DC\ for\ some\ of\ there\ data","#\ \ \ \ \ \ therefore\ it\ is\ strongly\ recommended\ to\ include\ such\ information!","","#mapping\ for\ all\ dc-terms\ properties","dc:*","","#\ copy\ dc:title\ to\ rdfs:label","dc:title\ >\ rdfs:label","","#\ deactivated\ by\ default,\ because\ such\ mappings\ are\ mapped\ to\ dc-terms","#dc-elements:*","","#\ mappings\ for\ the\ dc-elements\ properties\ to\ the\ dc-terms","dc-elements:contributor\ >\ dc:contributor","dc-elements:coverage\ >\ dc:coverage","dc-elements:creator\ >\ dc:creator","dc-elements:date\ >\ dc:date","dc-elements:description\ >\ dc:description","dc-elements:format\ >\ dc:format","dc-elements:identifier\ >\ dc:identifier","dc-elements:language\ >\ dc:language","dc-elements:publisher\ >\ dc:publisher","dc-elements:relation\ >\ dc:relation","dc-elements:rights\ >\ dc:rights","dc-elements:source\ >\ dc:source","dc-elements:subject\ >\ dc:subject","dc-elements:title\ >\ dc:title","dc-elements:type\ >\ dc:type","#also\ use\ dc-elements:title\ as\ label","dc-elements:title\ >\ rdfs:label","","#\ ---\ Social\ Networks\ (via\ foaf)\ ---","#The\ Friend\ of\ a\ Friend\ schema\ is\ often\ used\ to\ describe\ social\ relations\ between\ people","foaf:*","","#\ copy\ the\ name\ of\ a\ person\ over\ to\ rdfs:label","foaf:name\ >\ rdfs:label","","#\ additional\ data\ types\ checks","foaf:knows\ |\ d\=entityhub:ref","foaf:made\ |\ d\=entityhub:ref","foaf:maker\ |\ d\=entityhub:ref","foaf:member\ |\ d\=entityhub:ref","foaf:homepage\ |\ d\=xsd:anyURI","foaf:depiction\ |\ d\=xsd:anyURI","foaf:img\ |\ d\=xsd:anyURI","foaf:logo\ |\ d\=xsd:anyURI","#page\ about\ the\ entity","foaf:page\ |\ d\=xsd:anyURI","","","#\ ---\ Schema.org\ --","","#\ Defines\ an\ Ontology\ used\ by\ search\ engines\ (Google,\ Yahoo\ and\ Bing)\ for\ ","#\ indexing\ websites.","","schema:*","#\ Copy\ all\ names\ of\ schema\ instances\ over\ to\ rdfs:label","schema:name\ >\ rdfs:label","","#\ ---\ Simple\ Knowledge\ Organization\ System\ (SKOS)\ ---","","#\ A\ common\ data\ model\ for\ sharing\ and\ linking\ knowledge\ organization\ systems\ ","#\ via\ the\ Semantic\ Web.\ Typically\ used\ to\ encode\ controlled\ vocabularies\ as","#\ a\ thesaurus\ \ ","skos:*","","#\ copy\ all\ SKOS\ labels\ (preferred,\ alternative\ and\ hidden)\ over\ to\ rdfs:label","skos:prefLabel\ >\ rdfs:label","skos:altLabel\ >\ rdfs:label","skos:hiddenLabel\ >\ rdfs:label","","#\ copy\ values\ of\ **Match\ relations\ to\ the\ according\ related,\ broader\ and\ narrower","skos:relatedMatch\ >\ skos:related","skos:broadMatch\ >\ skos:broader","skos:narrowMatch\ >\ skos:skos:narrower","","#similar\ mappings\ for\ transitive\ variants\ are\ not\ contained,\ because\ transitive","#reasoning\ is\ not\ directly\ supported\ by\ the\ Entityhub.","","#\ Some\ SKOS\ thesaurus\ do\ use\ \"skos:transitiveBroader\"\ and\ \"skos:transitiveNarrower\"","#\ however\ such\ properties\ are\ only\ intended\ to\ be\ used\ by\ reasoners\ to","#\ calculate\ transitive\ closures\ over\ broader/narrower\ hierarchies.","#\ see\ http://www.w3.org/TR/skos-reference/#L2413\ for\ details","#\ to\ correct\ such\ cases\ we\ will\ copy\ transitive\ relations\ to\ their\ counterpart","skos:narrowerTransitive\ >\ skos:narrower","skos:broaderTransitive\ >\ skos:broader","","","#\ ---\ Semantically-Interlinked\ Online\ Communities\ (SIOC)\ ---","","#\ An\ ontology\ for\ describing\ the\ information\ in\ online\ communities.\ ","#\ This\ information\ can\ be\ used\ to\ export\ information\ from\ online\ communities\ ","#\ and\ to\ link\ them\ together.\ The\ scope\ of\ the\ application\ areas\ that\ SIOC\ can\ ","#\ be\ used\ for\ includes\ (and\ is\ not\ limited\ to)\ weblogs,\ message\ boards,\ ","#\ mailing\ lists\ and\ chat\ channels.","sioc:*","","#\ ---\ biographical\ information\ (bio)","#\ A\ vocabulary\ for\ describing\ biographical\ information\ about\ people,\ both\ living","#\ and\ dead.\ (see\ http://vocab.org/bio/0.1/)","bio:*","","#\ ---\ Rich\ Site\ Summary\ (rss)\ ---","rss:*","","#\ ---\ GoodRelations\ (gr)\ ---","#\ GoodRelations\ is\ a\ standardised\ vocabulary\ for\ product,\ price,\ and\ company\ data","gr:*","","#\ ---\ Creative\ Commons\ Rights\ Expression\ Language\ (cc)","#\ The\ Creative\ Commons\ Rights\ Expression\ Language\ (CC\ REL)\ lets\ you\ describe\ ","#\ copyright\ licenses\ in\ RDF.","cc:*","","","","","","") +org.apache.stanbol.entityhub.site.cacheStrategy="all" diff --git a/data/site-iptc/src/main/resources/config/org.apache.stanbol.entityhub.yard.solr.impl.SolrYard-iptc.config b/data/site-iptc/src/main/resources/config/org.apache.stanbol.entityhub.yard.solr.impl.SolrYard-iptc.config new file mode 100644 index 0000000..20372a3 --- /dev/null +++ b/data/site-iptc/src/main/resources/config/org.apache.stanbol.entityhub.yard.solr.impl.SolrYard-iptc.config @@ -0,0 +1,7 @@ +org.apache.stanbol.entityhub.yard.solr.solrUri="iptc" +org.apache.stanbol.entityhub.yard.name="iptc\ Index" +org.apache.stanbol.entityhub.yard.solr.multiYardIndexLayout=B"false" +org.apache.stanbol.entityhub.yard.solr.useDefaultConfig=B"false" +org.apache.stanbol.entityhub.yard.id="iptcIndex" +http://stanbol.apache.org/ontology/entityhub/entityhub#entityRank="http://stanbol.apache.org/ontology/entityhub/entityhub#entityRank" +org.apache.stanbol.entityhub.yard.description="Full\ local\ index\ for\ the\ Referenced\ Site\ \"iptc\"." diff --git a/data/site-iptc/src/main/resources/site/iptc.solrindex.zip b/data/site-iptc/src/main/resources/site/iptc.solrindex.zip new file mode 100644 index 0000000..10936e3 Binary files /dev/null and b/data/site-iptc/src/main/resources/site/iptc.solrindex.zip differ diff --git a/data/site-stw/pom.xml b/data/site-stw/pom.xml new file mode 100644 index 0000000..c70483d --- /dev/null +++ b/data/site-stw/pom.xml @@ -0,0 +1,63 @@ + + + + + 4.0.0 + + + eu.fusepool.p3.stanbol-launcher + stanbol-launcher-reactor + 1.0.0-SNAPSHOT + ../.. + + + stanbol-data-site-stw + bundle + + STW - Standard-Thesaurus Wirtschaft + + Provides the STW for entity linking + + + 2015 + + + site + config + + + + + + org.apache.felix + maven-bundle-plugin + true + + + ${data.path} + + -100 + + ${config.path} + + + + + + + diff --git a/data/site-stw/src/main/resources/config/org.apache.stanbol.enhancer.chain.weighted.impl.WeightedChain-stw_linking.config b/data/site-stw/src/main/resources/config/org.apache.stanbol.enhancer.chain.weighted.impl.WeightedChain-stw_linking.config new file mode 100644 index 0000000..0507650 --- /dev/null +++ b/data/site-stw/src/main/resources/config/org.apache.stanbol.enhancer.chain.weighted.impl.WeightedChain-stw_linking.config @@ -0,0 +1,3 @@ +stanbol.enhancer.chain.weighted.chain=["langdetect","opennlp-sentence","opennlp-token","opennlp-pos","stw-linking","text-annotation-new-model","fise2fam"] +stanbol.enhancer.chain.chainproperties=[""] +stanbol.enhancer.chain.name="stw-linking" diff --git a/data/site-stw/src/main/resources/config/org.apache.stanbol.enhancer.chain.weighted.impl.WeightedChain-stw_plain_linking.config b/data/site-stw/src/main/resources/config/org.apache.stanbol.enhancer.chain.weighted.impl.WeightedChain-stw_plain_linking.config new file mode 100644 index 0000000..37a96f1 --- /dev/null +++ b/data/site-stw/src/main/resources/config/org.apache.stanbol.enhancer.chain.weighted.impl.WeightedChain-stw_plain_linking.config @@ -0,0 +1,3 @@ +stanbol.enhancer.chain.weighted.chain=["langdetect","opennlp-sentence","opennlp-token","stw-plain-linking","text-annotation-new-model","fise2fam"] +stanbol.enhancer.chain.chainproperties=[""] +stanbol.enhancer.chain.name="stw-plain-linking" diff --git a/data/site-stw/src/main/resources/config/org.apache.stanbol.enhancer.engines.lucenefstlinking.FstLinkingEngineComponent-stw_linking.config b/data/site-stw/src/main/resources/config/org.apache.stanbol.enhancer.engines.lucenefstlinking.FstLinkingEngineComponent-stw_linking.config new file mode 100644 index 0000000..de46e5c --- /dev/null +++ b/data/site-stw/src/main/resources/config/org.apache.stanbol.enhancer.engines.lucenefstlinking.FstLinkingEngineComponent-stw_linking.config @@ -0,0 +1,17 @@ +enhancer.engines.linking.includeSimilarScore=B"true" +enhancer.engines.linking.lucenefst.fstThreadPoolSize=I"1" +enhancer.engines.linking.lucenefst.rankingField="entityhub:entityRank" +enhancer.engines.linking.caseSensitive=B"false" +enhancer.engines.linking.lucenefst.fieldEncoding="SolrYard" +enhancer.engines.linking.entityTypes=[""] +enhancer.engines.linking.suggestions=I"3" +enhancer.engines.linking.defaultMatchingLanguage="" +stanbol.enhancer.engine.name="stw-linking" +enhancer.engines.linking.lucenefst.entityCacheSize=I"65536" +enhancer.engines.linking.lucenefst.fstfolder="${solr-data-dir}/fst" +enhancer.engines.linking.lucenefst.fstconfig=["*;field\=rdfs:label;generate\=false"] +enhancer.engines.linking.lucenefst.solrcore="stw" +enhancer.engines.linking.lucenefst.typeField="rdf:type" +enhancer.engines.linking.typeMappings=["skos:Concept","http://zbw.eu/namespaces/zbw-extensions/Thsys","http://zbw.eu/namespaces/zbw-extensions/Descriptor"] +enhancer.engines.linking.processedLanguages=["*;lmmtip;uc\=LINK;prob\=0.75;pprob\=0.75","de;uc\=MATCH","es;lc\=Noun","nl;lc\=Noun"] +enhancer.engines.linking.properNounsState=B"false" diff --git a/data/site-stw/src/main/resources/config/org.apache.stanbol.enhancer.engines.lucenefstlinking.PlainFstLinkingComponnet-stw_linking.config b/data/site-stw/src/main/resources/config/org.apache.stanbol.enhancer.engines.lucenefstlinking.PlainFstLinkingComponnet-stw_linking.config new file mode 100644 index 0000000..982da2e --- /dev/null +++ b/data/site-stw/src/main/resources/config/org.apache.stanbol.enhancer.engines.lucenefstlinking.PlainFstLinkingComponnet-stw_linking.config @@ -0,0 +1,15 @@ +enhancer.engines.linking.includeSimilarScore=B"true" +enhancer.engines.linking.lucenefst.fstThreadPoolSize=I"1" +enhancer.engines.linking.lucenefst.rankingField="entityhub:entityRank" +enhancer.engines.linking.caseSensitive=B"false" +enhancer.engines.linking.lucenefst.fieldEncoding="SolrYard" +enhancer.engines.linking.entityTypes=[""] +enhancer.engines.linking.suggestions=I"3" +enhancer.engines.linking.defaultMatchingLanguage="" +stanbol.enhancer.engine.name="stw-plain-linking" +enhancer.engines.linking.lucenefst.entityCacheSize=I"65536" +enhancer.engines.linking.lucenefst.fstfolder="${solr-data-dir}/fst" +enhancer.engines.linking.lucenefst.fstconfig=["*;field\=rdfs:label;generate\=false"] +enhancer.engines.linking.lucenefst.solrcore="stw" +enhancer.engines.linking.lucenefst.typeField="rdf:type" +enhancer.engines.linking.typeMappings=["skos:Concept","http://zbw.eu/namespaces/zbw-extensions/Thsys","http://zbw.eu/namespaces/zbw-extensions/Descriptor"] diff --git a/data/site-stw/src/main/resources/config/org.apache.stanbol.entityhub.core.site.CacheImpl-stw.config b/data/site-stw/src/main/resources/config/org.apache.stanbol.entityhub.core.site.CacheImpl-stw.config new file mode 100755 index 0000000..e0fd249 --- /dev/null +++ b/data/site-stw/src/main/resources/config/org.apache.stanbol.entityhub.core.site.CacheImpl-stw.config @@ -0,0 +1,4 @@ +org.apache.stanbol.entityhub.yard.name="stw\ Cache" +org.apache.stanbol.entityhub.yard.cacheYardId="stwIndex" +org.apache.stanbol.entityhub.yard.id="stwIndex" +org.apache.stanbol.entityhub.yard.description="Cache\ for\ the\ stw\ Referenced\ Site\ using\ the\ stwIndex." diff --git a/data/site-stw/src/main/resources/config/org.apache.stanbol.entityhub.site.referencedSite-stw.config b/data/site-stw/src/main/resources/config/org.apache.stanbol.entityhub.site.referencedSite-stw.config new file mode 100755 index 0000000..8c00dfd --- /dev/null +++ b/data/site-stw/src/main/resources/config/org.apache.stanbol.entityhub.site.referencedSite-stw.config @@ -0,0 +1,14 @@ +org.apache.stanbol.entityhub.site.licenseName=["Open\ Database\ License\ (ODbL)\ 1.0."] +org.apache.stanbol.entityhub.site.defaultExpireDuration=I"0" +org.apache.stanbol.entityhub.site.licenseUrl=["http://opendatacommons.org/licenses/odbl/1-0/"] +org.apache.stanbol.entityhub.site.attributionUrl="http://zbw.eu/stw/versions/latest/about.en.html" +org.apache.stanbol.entityhub.site.cacheId="stwIndex" +org.apache.stanbol.entityhub.site.defaultSymbolState="proposed" +org.apache.stanbol.entityhub.site.name="stw" +org.apache.stanbol.entityhub.site.entityPrefix=["http://zbw.eu/stw/"] +org.apache.stanbol.entityhub.site.id="stw" +org.apache.stanbol.entityhub.site.description="Standard-Thesaurus\ Wirtschaft" +org.apache.stanbol.entityhub.site.attribution="STW\ Thesaurus\ for\ Economics" +org.apache.stanbol.entityhub.site.defaultMappedEntityState="proposed" +org.apache.stanbol.entityhub.site.fieldMappings=("#\ Licensed\ to\ the\ Apache\ Software\ Foundation\ (ASF)\ under\ one\ or\ more","#\ contributor\ license\ agreements.\ \ See\ the\ NOTICE\ file\ distributed\ with","#\ this\ work\ for\ additional\ information\ regarding\ copyright\ ownership.","#\ The\ ASF\ licenses\ this\ file\ to\ You\ under\ the\ Apache\ License,\ Version\ 2.0","#\ (the\ \"License\");\ you\ may\ not\ use\ this\ file\ except\ in\ compliance\ with","#\ the\ License.\ \ You\ may\ obtain\ a\ copy\ of\ the\ License\ at","#","#\ \ \ \ \ http://www.apache.org/licenses/LICENSE-2.0","#","#\ Unless\ required\ by\ applicable\ law\ or\ agreed\ to\ in\ writing,\ software","#\ distributed\ under\ the\ License\ is\ distributed\ on\ an\ \"AS\ IS\"\ BASIS,","#\ WITHOUT\ WARRANTIES\ OR\ CONDITIONS\ OF\ ANY\ KIND,\ either\ express\ or\ implied.","#\ See\ the\ License\ for\ the\ specific\ language\ governing\ permissions\ and","#\ limitations\ under\ the\ License.","#","#NOTE:\ THIS\ IS\ A\ DEFAULT\ MAPPING\ SPECIFICATION\ THAT\ INCLUDES\ MAPPINGS\ FOR","#\ \ \ \ \ \ COMMON\ ONTOLOGIES.\ USERS\ MIGHT\ WANT\ TO\ ADAPT\ THIS\ CONFIGURATION\ BY","#\ \ \ \ \ \ COMMENTING/UNCOMMENTING\ AND/OR\ ADDING\ NEW\ MAPPINGS","","#\ ---\ Define\ the\ Languages\ for\ all\ fields\ ---","#\ to\ restrict\ languages\ to\ be\ imported\ (for\ all\ fields)","#|\ @\=null;en;de;fr;it","","#NOTE:\ null\ is\ used\ to\ import\ labels\ with\ no\ specified\ language","","#\ ---\ Define\ the\ Languages\ for\ all\ fields\ ---","#\ Uncomment\ to\ restrict\ indexing\ to\ a\ specific\ list\ of\ languages,\ otherwise\ all","#\ languages\ are\ indexed","#|\ @\=null;en;de;fr;it","","#\ ---\ RDF\ RDFS\ and\ OWL\ Mappings\ ---","#\ This\ configuration\ only\ index\ properties\ that\ are\ typically\ used\ to\ store","#\ instance\ data\ defined\ by\ such\ namespaces.\ This\ excludes\ ontology\ definitions","","#\ NOTE\ that\ nearly\ all\ other\ ontologies\ are\ are\ using\ properties\ of\ these\ three","#\ \ \ \ \ \ schemas,\ therefore\ it\ is\ strongly\ recommended\ to\ include\ such\ information!","","rdf:type\ |\ d\=entityhub:ref","","rdfs:label\ ","rdfs:comment","rdfs:seeAlso\ |\ d\=entityhub:ref","","","owl:sameAs\ |\ d\=entityhub:ref","","#If\ one\ likes\ to\ also\ index\ ontologies\ one\ should\ add\ the\ following\ statements","#owl:*","#rdfs:*","","#\ ---\ Dublin\ Core\ (DC)\ ---","#\ The\ default\ configuration\ imports\ all\ dc-terms\ data\ and\ copies\ values\ for\ the","#\ old\ dc-elements\ standard\ over\ to\ the\ according\ properties\ of\ the\ dc-terms","#\ standard.","","#\ NOTE\ that\ a\ lot\ of\ other\ ontologies\ are\ also\ using\ DC\ for\ some\ of\ there\ data","#\ \ \ \ \ \ therefore\ it\ is\ strongly\ recommended\ to\ include\ such\ information!","","#mapping\ for\ all\ dc-terms\ properties","dc:*","","#\ copy\ dc:title\ to\ rdfs:label","dc:title\ >\ rdfs:label","","#\ deactivated\ by\ default,\ because\ such\ mappings\ are\ mapped\ to\ dc-terms","#dc-elements:*","","#\ mappings\ for\ the\ dc-elements\ properties\ to\ the\ dc-terms","dc-elements:contributor\ >\ dc:contributor","dc-elements:coverage\ >\ dc:coverage","dc-elements:creator\ >\ dc:creator","dc-elements:date\ >\ dc:date","dc-elements:description\ >\ dc:description","dc-elements:format\ >\ dc:format","dc-elements:identifier\ >\ dc:identifier","dc-elements:language\ >\ dc:language","dc-elements:publisher\ >\ dc:publisher","dc-elements:relation\ >\ dc:relation","dc-elements:rights\ >\ dc:rights","dc-elements:source\ >\ dc:source","dc-elements:subject\ >\ dc:subject","dc-elements:title\ >\ dc:title","dc-elements:type\ >\ dc:type","#also\ use\ dc-elements:title\ as\ label","dc-elements:title\ >\ rdfs:label","","#\ ---\ Social\ Networks\ (via\ foaf)\ ---","#The\ Friend\ of\ a\ Friend\ schema\ is\ often\ used\ to\ describe\ social\ relations\ between\ people","foaf:*","","#\ copy\ the\ name\ of\ a\ person\ over\ to\ rdfs:label","foaf:name\ >\ rdfs:label","","#\ additional\ data\ types\ checks","foaf:knows\ |\ d\=entityhub:ref","foaf:made\ |\ d\=entityhub:ref","foaf:maker\ |\ d\=entityhub:ref","foaf:member\ |\ d\=entityhub:ref","foaf:homepage\ |\ d\=xsd:anyURI","foaf:depiction\ |\ d\=xsd:anyURI","foaf:img\ |\ d\=xsd:anyURI","foaf:logo\ |\ d\=xsd:anyURI","#page\ about\ the\ entity","foaf:page\ |\ d\=xsd:anyURI","","","#\ ---\ Schema.org\ --","","#\ Defines\ an\ Ontology\ used\ by\ search\ engines\ (Google,\ Yahoo\ and\ Bing)\ for\ ","#\ indexing\ websites.","","schema:*","#\ Copy\ all\ names\ of\ schema\ instances\ over\ to\ rdfs:label","schema:name\ >\ rdfs:label","","#\ ---\ Simple\ Knowledge\ Organization\ System\ (SKOS)\ ---","","#\ A\ common\ data\ model\ for\ sharing\ and\ linking\ knowledge\ organization\ systems\ ","#\ via\ the\ Semantic\ Web.\ Typically\ used\ to\ encode\ controlled\ vocabularies\ as","#\ a\ thesaurus\ \ ","skos:*","","#\ copy\ all\ SKOS\ labels\ (preferred,\ alternative\ and\ hidden)\ over\ to\ rdfs:label","skos:prefLabel\ >\ rdfs:label","skos:altLabel\ >\ rdfs:label","skos:hiddenLabel\ >\ rdfs:label","","#\ copy\ values\ of\ **Match\ relations\ to\ the\ according\ related,\ broader\ and\ narrower","skos:relatedMatch\ >\ skos:related","skos:broadMatch\ >\ skos:broader","skos:narrowMatch\ >\ skos:skos:narrower","","#similar\ mappings\ for\ transitive\ variants\ are\ not\ contained,\ because\ transitive","#reasoning\ is\ not\ directly\ supported\ by\ the\ Entityhub.","","#\ Some\ SKOS\ thesaurus\ do\ use\ \"skos:transitiveBroader\"\ and\ \"skos:transitiveNarrower\"","#\ however\ such\ properties\ are\ only\ intended\ to\ be\ used\ by\ reasoners\ to","#\ calculate\ transitive\ closures\ over\ broader/narrower\ hierarchies.","#\ see\ http://www.w3.org/TR/skos-reference/#L2413\ for\ details","#\ to\ correct\ such\ cases\ we\ will\ copy\ transitive\ relations\ to\ their\ counterpart","skos:narrowerTransitive\ >\ skos:narrower","skos:broaderTransitive\ >\ skos:broader","","","#\ ---\ Semantically-Interlinked\ Online\ Communities\ (SIOC)\ ---","","#\ An\ ontology\ for\ describing\ the\ information\ in\ online\ communities.\ ","#\ This\ information\ can\ be\ used\ to\ export\ information\ from\ online\ communities\ ","#\ and\ to\ link\ them\ together.\ The\ scope\ of\ the\ application\ areas\ that\ SIOC\ can\ ","#\ be\ used\ for\ includes\ (and\ is\ not\ limited\ to)\ weblogs,\ message\ boards,\ ","#\ mailing\ lists\ and\ chat\ channels.","sioc:*","","#\ ---\ biographical\ information\ (bio)","#\ A\ vocabulary\ for\ describing\ biographical\ information\ about\ people,\ both\ living","#\ and\ dead.\ (see\ http://vocab.org/bio/0.1/)","bio:*","","#\ ---\ Rich\ Site\ Summary\ (rss)\ ---","rss:*","","#\ ---\ GoodRelations\ (gr)\ ---","#\ GoodRelations\ is\ a\ standardised\ vocabulary\ for\ product,\ price,\ and\ company\ data","gr:*","","#\ ---\ Creative\ Commons\ Rights\ Expression\ Language\ (cc)","#\ The\ Creative\ Commons\ Rights\ Expression\ Language\ (CC\ REL)\ lets\ you\ describe\ ","#\ copyright\ licenses\ in\ RDF.","cc:*","","","","","","") +org.apache.stanbol.entityhub.site.cacheStrategy="all" diff --git a/data/site-stw/src/main/resources/config/org.apache.stanbol.entityhub.yard.solr.impl.SolrYard-stw.config b/data/site-stw/src/main/resources/config/org.apache.stanbol.entityhub.yard.solr.impl.SolrYard-stw.config new file mode 100755 index 0000000..53163fe --- /dev/null +++ b/data/site-stw/src/main/resources/config/org.apache.stanbol.entityhub.yard.solr.impl.SolrYard-stw.config @@ -0,0 +1,7 @@ +org.apache.stanbol.entityhub.yard.solr.solrUri="stw" +org.apache.stanbol.entityhub.yard.name="stw\ Index" +org.apache.stanbol.entityhub.yard.solr.multiYardIndexLayout=B"false" +org.apache.stanbol.entityhub.yard.solr.useDefaultConfig=B"false" +org.apache.stanbol.entityhub.yard.id="stwIndex" +http://stanbol.apache.org/ontology/entityhub/entityhub#entityRank="http://stanbol.apache.org/ontology/entityhub/entityhub#entityRank" +org.apache.stanbol.entityhub.yard.description="Full\ local\ index\ for\ the\ Referenced\ Site\ \"stw\"." diff --git a/data/site-stw/src/main/resources/config/stw.solrindex.ref b/data/site-stw/src/main/resources/config/stw.solrindex.ref new file mode 100755 index 0000000..3ec6cde --- /dev/null +++ b/data/site-stw/src/main/resources/config/stw.solrindex.ref @@ -0,0 +1,5 @@ +#Mon Jun 29 10:25:35 CEST 2015 +Name=SolrIndex for stw +Synchronized=true +Description=Standard-Thesaurus Wirtschaft +Index-Archive=stw.solrindex.zip diff --git a/data/site-stw/src/main/resources/site/stw.solrindex.zip b/data/site-stw/src/main/resources/site/stw.solrindex.zip new file mode 100644 index 0000000..980d1cd Binary files /dev/null and b/data/site-stw/src/main/resources/site/stw.solrindex.zip differ diff --git a/data/site-thesoz/pom.xml b/data/site-thesoz/pom.xml new file mode 100644 index 0000000..4cf8b56 --- /dev/null +++ b/data/site-thesoz/pom.xml @@ -0,0 +1,63 @@ + + + + + 4.0.0 + + + eu.fusepool.p3.stanbol-launcher + stanbol-launcher-reactor + 1.0.0-SNAPSHOT + ../.. + + + stanbol-data-site-thesoz + bundle + + Thesaurus for the Social Sciences + + Provides the Thesaurus for the Social Sciences for entity linking + + + 2015 + + + site + config + + + + + + org.apache.felix + maven-bundle-plugin + true + + + ${data.path} + + -100 + + ${config.path} + + + + + + + diff --git a/data/site-thesoz/src/main/resources/config/org.apache.stanbol.enhancer.chain.weighted.impl.WeightedChain-thesoz_linking.config b/data/site-thesoz/src/main/resources/config/org.apache.stanbol.enhancer.chain.weighted.impl.WeightedChain-thesoz_linking.config new file mode 100644 index 0000000..2ee1bee --- /dev/null +++ b/data/site-thesoz/src/main/resources/config/org.apache.stanbol.enhancer.chain.weighted.impl.WeightedChain-thesoz_linking.config @@ -0,0 +1,3 @@ +stanbol.enhancer.chain.weighted.chain=["langdetect","opennlp-sentence","opennlp-token","opennlp-pos","thesoz-linking","text-annotation-new-model","fise2fam"] +stanbol.enhancer.chain.chainproperties=[""] +stanbol.enhancer.chain.name="thesoz-linking" diff --git a/data/site-thesoz/src/main/resources/config/org.apache.stanbol.enhancer.chain.weighted.impl.WeightedChain-thesoz_plain_linking.config b/data/site-thesoz/src/main/resources/config/org.apache.stanbol.enhancer.chain.weighted.impl.WeightedChain-thesoz_plain_linking.config new file mode 100644 index 0000000..15cfb0c --- /dev/null +++ b/data/site-thesoz/src/main/resources/config/org.apache.stanbol.enhancer.chain.weighted.impl.WeightedChain-thesoz_plain_linking.config @@ -0,0 +1,3 @@ +stanbol.enhancer.chain.weighted.chain=["langdetect","opennlp-sentence","opennlp-token","thesoz-plain-linking","text-annotation-new-model","fise2fam"] +stanbol.enhancer.chain.chainproperties=[""] +stanbol.enhancer.chain.name="thesoz-plain-linking" diff --git a/data/site-thesoz/src/main/resources/config/org.apache.stanbol.enhancer.engines.lucenefstlinking.FstLinkingEngineComponent-thesoz_linking.config b/data/site-thesoz/src/main/resources/config/org.apache.stanbol.enhancer.engines.lucenefstlinking.FstLinkingEngineComponent-thesoz_linking.config new file mode 100644 index 0000000..95c2766 --- /dev/null +++ b/data/site-thesoz/src/main/resources/config/org.apache.stanbol.enhancer.engines.lucenefstlinking.FstLinkingEngineComponent-thesoz_linking.config @@ -0,0 +1,17 @@ +enhancer.engines.linking.includeSimilarScore=B"true" +enhancer.engines.linking.lucenefst.fstThreadPoolSize=I"1" +enhancer.engines.linking.lucenefst.rankingField="entityhub:entityRank" +enhancer.engines.linking.caseSensitive=B"false" +enhancer.engines.linking.lucenefst.fieldEncoding="SolrYard" +enhancer.engines.linking.entityTypes=[""] +enhancer.engines.linking.suggestions=I"3" +enhancer.engines.linking.defaultMatchingLanguage="" +stanbol.enhancer.engine.name="thesoz-linking" +enhancer.engines.linking.lucenefst.entityCacheSize=I"65536" +enhancer.engines.linking.lucenefst.fstfolder="${solr-data-dir}/fst" +enhancer.engines.linking.lucenefst.fstconfig=["*;field\=rdfs:label;generate\=false"] +enhancer.engines.linking.lucenefst.solrcore="thesoz" +enhancer.engines.linking.lucenefst.typeField="rdf:type" +enhancer.engines.linking.typeMappings=["http://lod.gesis.org/thesoz/ext/Classification\ >\ skos:Concept","http://lod.gesis.org/thesoz/ext/Classification"] +enhancer.engines.linking.processedLanguages=["*;lmmtip;uc\=LINK;prob\=0.75;pprob\=0.75","de;uc\=MATCH","es;lc\=Noun","nl;lc\=Noun"] +enhancer.engines.linking.properNounsState=B"false" diff --git a/data/site-thesoz/src/main/resources/config/org.apache.stanbol.enhancer.engines.lucenefstlinking.PlainFstLinkingComponnet-thesoz_linking.config b/data/site-thesoz/src/main/resources/config/org.apache.stanbol.enhancer.engines.lucenefstlinking.PlainFstLinkingComponnet-thesoz_linking.config new file mode 100644 index 0000000..96f50d5 --- /dev/null +++ b/data/site-thesoz/src/main/resources/config/org.apache.stanbol.enhancer.engines.lucenefstlinking.PlainFstLinkingComponnet-thesoz_linking.config @@ -0,0 +1,15 @@ +enhancer.engines.linking.includeSimilarScore=B"true" +enhancer.engines.linking.lucenefst.fstThreadPoolSize=I"1" +enhancer.engines.linking.lucenefst.rankingField="entityhub:entityRank" +enhancer.engines.linking.caseSensitive=B"false" +enhancer.engines.linking.lucenefst.fieldEncoding="SolrYard" +enhancer.engines.linking.entityTypes=[""] +enhancer.engines.linking.suggestions=I"3" +enhancer.engines.linking.defaultMatchingLanguage="" +stanbol.enhancer.engine.name="thesoz-plain-linking" +enhancer.engines.linking.lucenefst.entityCacheSize=I"65536" +enhancer.engines.linking.lucenefst.fstfolder="${solr-data-dir}/fst" +enhancer.engines.linking.lucenefst.fstconfig=["*;field\=rdfs:label;generate\=false"] +enhancer.engines.linking.lucenefst.solrcore="thesoz" +enhancer.engines.linking.lucenefst.typeField="rdf:type" +enhancer.engines.linking.typeMappings=["http://lod.gesis.org/thesoz/ext/Classification\ >\ skos:Concept","http://lod.gesis.org/thesoz/ext/Classification"] diff --git a/data/site-thesoz/src/main/resources/config/org.apache.stanbol.entityhub.core.site.CacheImpl-thesoz.config b/data/site-thesoz/src/main/resources/config/org.apache.stanbol.entityhub.core.site.CacheImpl-thesoz.config new file mode 100644 index 0000000..a359c69 --- /dev/null +++ b/data/site-thesoz/src/main/resources/config/org.apache.stanbol.entityhub.core.site.CacheImpl-thesoz.config @@ -0,0 +1,4 @@ +org.apache.stanbol.entityhub.yard.name="thesoz\ Cache" +org.apache.stanbol.entityhub.yard.cacheYardId="thesozIndex" +org.apache.stanbol.entityhub.yard.id="thesozIndex" +org.apache.stanbol.entityhub.yard.description="Cache\ for\ the\ thesoz\ Referenced\ Site\ using\ the\ thesozIndex." diff --git a/data/site-thesoz/src/main/resources/config/org.apache.stanbol.entityhub.site.referencedSite-thesoz.config b/data/site-thesoz/src/main/resources/config/org.apache.stanbol.entityhub.site.referencedSite-thesoz.config new file mode 100644 index 0000000..abc5466 --- /dev/null +++ b/data/site-thesoz/src/main/resources/config/org.apache.stanbol.entityhub.site.referencedSite-thesoz.config @@ -0,0 +1,14 @@ +org.apache.stanbol.entityhub.site.licenseName=["Creative\ Commons\ Attribution-Noncommercial-No\ Derivative\ Works\ 3.0\ Germany\ License"] +org.apache.stanbol.entityhub.site.defaultExpireDuration=I"0" +org.apache.stanbol.entityhub.site.licenseUrl=["http://creativecommons.org/licenses/by-nc-nd/3.0/de/"] +org.apache.stanbol.entityhub.site.attributionUrl="http://www.gesis.org/en/services/research/thesauri-und-klassifikationen/social-science-thesaurus/" +org.apache.stanbol.entityhub.site.cacheId="thesozIndex" +org.apache.stanbol.entityhub.site.defaultSymbolState="proposed" +org.apache.stanbol.entityhub.site.name="thesoz" +org.apache.stanbol.entityhub.site.entityPrefix=["http://lod.gesis.org/thesoz/"] +org.apache.stanbol.entityhub.site.id="thesoz" +org.apache.stanbol.entityhub.site.description="Thesaurus\ for\ the\ Social\ Sciences" +org.apache.stanbol.entityhub.site.attribution="Thesaurus\ for\ the\ Social\ Sciences" +org.apache.stanbol.entityhub.site.defaultMappedEntityState="proposed" +org.apache.stanbol.entityhub.site.fieldMappings=("#\ Licensed\ to\ the\ Apache\ Software\ Foundation\ (ASF)\ under\ one\ or\ more","#\ contributor\ license\ agreements.\ \ See\ the\ NOTICE\ file\ distributed\ with","#\ this\ work\ for\ additional\ information\ regarding\ copyright\ ownership.","#\ The\ ASF\ licenses\ this\ file\ to\ You\ under\ the\ Apache\ License,\ Version\ 2.0","#\ (the\ \"License\");\ you\ may\ not\ use\ this\ file\ except\ in\ compliance\ with","#\ the\ License.\ \ You\ may\ obtain\ a\ copy\ of\ the\ License\ at","#","#\ \ \ \ \ http://www.apache.org/licenses/LICENSE-2.0","#","#\ Unless\ required\ by\ applicable\ law\ or\ agreed\ to\ in\ writing,\ software","#\ distributed\ under\ the\ License\ is\ distributed\ on\ an\ \"AS\ IS\"\ BASIS,","#\ WITHOUT\ WARRANTIES\ OR\ CONDITIONS\ OF\ ANY\ KIND,\ either\ express\ or\ implied.","#\ See\ the\ License\ for\ the\ specific\ language\ governing\ permissions\ and","#\ limitations\ under\ the\ License.","#","#NOTE:\ THIS\ IS\ A\ DEFAULT\ MAPPING\ SPECIFICATION\ THAT\ INCLUDES\ MAPPINGS\ FOR","#\ \ \ \ \ \ COMMON\ ONTOLOGIES.\ USERS\ MIGHT\ WANT\ TO\ ADAPT\ THIS\ CONFIGURATION\ BY","#\ \ \ \ \ \ COMMENTING/UNCOMMENTING\ AND/OR\ ADDING\ NEW\ MAPPINGS","","#\ ---\ Define\ the\ Languages\ for\ all\ fields\ ---","#\ to\ restrict\ languages\ to\ be\ imported\ (for\ all\ fields)","#|\ @\=null;en;de;fr;it","","#NOTE:\ null\ is\ used\ to\ import\ labels\ with\ no\ specified\ language","","#\ ---\ Define\ the\ Languages\ for\ all\ fields\ ---","#\ Uncomment\ to\ restrict\ indexing\ to\ a\ specific\ list\ of\ languages,\ otherwise\ all","#\ languages\ are\ indexed","#|\ @\=null;en;de;fr;it","","#\ ---\ RDF\ RDFS\ and\ OWL\ Mappings\ ---","#\ This\ configuration\ only\ index\ properties\ that\ are\ typically\ used\ to\ store","#\ instance\ data\ defined\ by\ such\ namespaces.\ This\ excludes\ ontology\ definitions","","#\ NOTE\ that\ nearly\ all\ other\ ontologies\ are\ are\ using\ properties\ of\ these\ three","#\ \ \ \ \ \ schemas,\ therefore\ it\ is\ strongly\ recommended\ to\ include\ such\ information!","","rdf:type\ |\ d\=entityhub:ref","","rdfs:label\ ","rdfs:comment","rdfs:seeAlso\ |\ d\=entityhub:ref","","","owl:sameAs\ |\ d\=entityhub:ref","","#If\ one\ likes\ to\ also\ index\ ontologies\ one\ should\ add\ the\ following\ statements","#owl:*","#rdfs:*","","#\ ---\ Dublin\ Core\ (DC)\ ---","#\ The\ default\ configuration\ imports\ all\ dc-terms\ data\ and\ copies\ values\ for\ the","#\ old\ dc-elements\ standard\ over\ to\ the\ according\ properties\ of\ the\ dc-terms","#\ standard.","","#\ NOTE\ that\ a\ lot\ of\ other\ ontologies\ are\ also\ using\ DC\ for\ some\ of\ there\ data","#\ \ \ \ \ \ therefore\ it\ is\ strongly\ recommended\ to\ include\ such\ information!","","#mapping\ for\ all\ dc-terms\ properties","dc:*","","#\ copy\ dc:title\ to\ rdfs:label","dc:title\ >\ rdfs:label","","#\ deactivated\ by\ default,\ because\ such\ mappings\ are\ mapped\ to\ dc-terms","#dc-elements:*","","#\ mappings\ for\ the\ dc-elements\ properties\ to\ the\ dc-terms","dc-elements:contributor\ >\ dc:contributor","dc-elements:coverage\ >\ dc:coverage","dc-elements:creator\ >\ dc:creator","dc-elements:date\ >\ dc:date","dc-elements:description\ >\ dc:description","dc-elements:format\ >\ dc:format","dc-elements:identifier\ >\ dc:identifier","dc-elements:language\ >\ dc:language","dc-elements:publisher\ >\ dc:publisher","dc-elements:relation\ >\ dc:relation","dc-elements:rights\ >\ dc:rights","dc-elements:source\ >\ dc:source","dc-elements:subject\ >\ dc:subject","dc-elements:title\ >\ dc:title","dc-elements:type\ >\ dc:type","#also\ use\ dc-elements:title\ as\ label","dc-elements:title\ >\ rdfs:label","","#\ ---\ Social\ Networks\ (via\ foaf)\ ---","#The\ Friend\ of\ a\ Friend\ schema\ is\ often\ used\ to\ describe\ social\ relations\ between\ people","foaf:*","","#\ copy\ the\ name\ of\ a\ person\ over\ to\ rdfs:label","foaf:name\ >\ rdfs:label","","#\ additional\ data\ types\ checks","foaf:knows\ |\ d\=entityhub:ref","foaf:made\ |\ d\=entityhub:ref","foaf:maker\ |\ d\=entityhub:ref","foaf:member\ |\ d\=entityhub:ref","foaf:homepage\ |\ d\=xsd:anyURI","foaf:depiction\ |\ d\=xsd:anyURI","foaf:img\ |\ d\=xsd:anyURI","foaf:logo\ |\ d\=xsd:anyURI","#page\ about\ the\ entity","foaf:page\ |\ d\=xsd:anyURI","","","#\ ---\ Schema.org\ --","","#\ Defines\ an\ Ontology\ used\ by\ search\ engines\ (Google,\ Yahoo\ and\ Bing)\ for\ ","#\ indexing\ websites.","","schema:*","#\ Copy\ all\ names\ of\ schema\ instances\ over\ to\ rdfs:label","schema:name\ >\ rdfs:label","","#\ ---\ Simple\ Knowledge\ Organization\ System\ (SKOS)\ ---","","#\ A\ common\ data\ model\ for\ sharing\ and\ linking\ knowledge\ organization\ systems\ ","#\ via\ the\ Semantic\ Web.\ Typically\ used\ to\ encode\ controlled\ vocabularies\ as","#\ a\ thesaurus\ \ ","skos:*","","#\ copy\ all\ SKOS\ labels\ (preferred,\ alternative\ and\ hidden)\ over\ to\ rdfs:label","skos:prefLabel\ >\ rdfs:label","skos:altLabel\ >\ rdfs:label","skos:hiddenLabel\ >\ rdfs:label","","#\ copy\ values\ of\ **Match\ relations\ to\ the\ according\ related,\ broader\ and\ narrower","skos:relatedMatch\ >\ skos:related","skos:broadMatch\ >\ skos:broader","skos:narrowMatch\ >\ skos:skos:narrower","","#similar\ mappings\ for\ transitive\ variants\ are\ not\ contained,\ because\ transitive","#reasoning\ is\ not\ directly\ supported\ by\ the\ Entityhub.","","#\ Some\ SKOS\ thesaurus\ do\ use\ \"skos:transitiveBroader\"\ and\ \"skos:transitiveNarrower\"","#\ however\ such\ properties\ are\ only\ intended\ to\ be\ used\ by\ reasoners\ to","#\ calculate\ transitive\ closures\ over\ broader/narrower\ hierarchies.","#\ see\ http://www.w3.org/TR/skos-reference/#L2413\ for\ details","#\ to\ correct\ such\ cases\ we\ will\ copy\ transitive\ relations\ to\ their\ counterpart","skos:narrowerTransitive\ >\ skos:narrower","skos:broaderTransitive\ >\ skos:broader","","","#\ ---\ Semantically-Interlinked\ Online\ Communities\ (SIOC)\ ---","","#\ An\ ontology\ for\ describing\ the\ information\ in\ online\ communities.\ ","#\ This\ information\ can\ be\ used\ to\ export\ information\ from\ online\ communities\ ","#\ and\ to\ link\ them\ together.\ The\ scope\ of\ the\ application\ areas\ that\ SIOC\ can\ ","#\ be\ used\ for\ includes\ (and\ is\ not\ limited\ to)\ weblogs,\ message\ boards,\ ","#\ mailing\ lists\ and\ chat\ channels.","sioc:*","","#\ ---\ biographical\ information\ (bio)","#\ A\ vocabulary\ for\ describing\ biographical\ information\ about\ people,\ both\ living","#\ and\ dead.\ (see\ http://vocab.org/bio/0.1/)","bio:*","","#\ ---\ Rich\ Site\ Summary\ (rss)\ ---","rss:*","","#\ ---\ GoodRelations\ (gr)\ ---","#\ GoodRelations\ is\ a\ standardised\ vocabulary\ for\ product,\ price,\ and\ company\ data","gr:*","","#\ ---\ Creative\ Commons\ Rights\ Expression\ Language\ (cc)","#\ The\ Creative\ Commons\ Rights\ Expression\ Language\ (CC\ REL)\ lets\ you\ describe\ ","#\ copyright\ licenses\ in\ RDF.","cc:*","","","","","","") +org.apache.stanbol.entityhub.site.cacheStrategy="all" diff --git a/data/site-thesoz/src/main/resources/config/org.apache.stanbol.entityhub.yard.solr.impl.SolrYard-thesoz.config b/data/site-thesoz/src/main/resources/config/org.apache.stanbol.entityhub.yard.solr.impl.SolrYard-thesoz.config new file mode 100644 index 0000000..3a37c5a --- /dev/null +++ b/data/site-thesoz/src/main/resources/config/org.apache.stanbol.entityhub.yard.solr.impl.SolrYard-thesoz.config @@ -0,0 +1,7 @@ +org.apache.stanbol.entityhub.yard.solr.solrUri="thesoz" +org.apache.stanbol.entityhub.yard.name="thesoz\ Index" +org.apache.stanbol.entityhub.yard.solr.multiYardIndexLayout=B"false" +org.apache.stanbol.entityhub.yard.solr.useDefaultConfig=B"false" +org.apache.stanbol.entityhub.yard.id="thesozIndex" +http://stanbol.apache.org/ontology/entityhub/entityhub#entityRank="http://stanbol.apache.org/ontology/entityhub/entityhub#entityRank" +org.apache.stanbol.entityhub.yard.description="Full\ local\ index\ for\ the\ Referenced\ Site\ \"thesoz\"." diff --git a/data/site-thesoz/src/main/resources/config/thesoz.solrindex.ref b/data/site-thesoz/src/main/resources/config/thesoz.solrindex.ref new file mode 100644 index 0000000..a78a97f --- /dev/null +++ b/data/site-thesoz/src/main/resources/config/thesoz.solrindex.ref @@ -0,0 +1,5 @@ +#Mon Jun 29 10:35:28 CEST 2015 +Name=SolrIndex for thesoz +Synchronized=true +Description=Thesaurus for the Social Sciences +Index-Archive=thesoz.solrindex.zip diff --git a/data/site-thesoz/src/main/resources/site/thesoz.solrindex.zip b/data/site-thesoz/src/main/resources/site/thesoz.solrindex.zip new file mode 100644 index 0000000..7460fc3 Binary files /dev/null and b/data/site-thesoz/src/main/resources/site/thesoz.solrindex.zip differ diff --git a/launcher/core/pom.xml b/launcher/core/pom.xml index 023500a..4be3619 100644 --- a/launcher/core/pom.xml +++ b/launcher/core/pom.xml @@ -165,8 +165,18 @@ partialbundlelist provided --> - - + + + + + org.apache.clerezza.provisioning diff --git a/launcher/default/pom.xml b/launcher/default/pom.xml index 4bf17c6..0752203 100644 --- a/launcher/default/pom.xml +++ b/launcher/default/pom.xml @@ -167,8 +167,16 @@ partialbundlelist provided + + + eu.fusepool.p3.stanbol-launcher + stanbol-launcher-ld-sites-bundlelist + ${project.version} + partialbundlelist + provided + - + org.apache.clerezza.provisioning diff --git a/launcher/default/src/main/config/org.apache.stanbol.enhancer.chain.allactive.impl.DefaultChain.config b/launcher/default/src/main/config/org.apache.stanbol.enhancer.chain.allactive.impl.DefaultChain.config new file mode 100644 index 0000000..3c69400 --- /dev/null +++ b/launcher/default/src/main/config/org.apache.stanbol.enhancer.chain.allactive.impl.DefaultChain.config @@ -0,0 +1,3 @@ +# Configuration created by Apache Sling File Installer +stanbol.enhancer.chain.default.enabled=B"false" +stanbol.enhancer.chain.default.name="all-active" diff --git a/pom.xml b/pom.xml index c86d11d..73805ae 100644 --- a/pom.xml +++ b/pom.xml @@ -62,14 +62,21 @@ stanbol-enhancer-adapter datatxt-stanbol - + + data/opennlp-pos data/opennlp-ner data/ixa-nerc + + data/site-iptc + data/site-stw + data/site-thesoz + bundlelist/fusepool bundlelist/nlp-models + bundlelist/ld-sites launcher/core