ref_all.bib

@Article{ citeulike:6545194,
	abstract = "Web documents are available in various forms, most of which do not carry additional semantics. This paper presents a model for general document metadata extraction. The model, which combines segmentation by keywords and pattern matching techniques, was implemented using PHP, MySQL, JavaScript and HTML. The system was tested with 40 randomly selected PDF documents (mainly theses). An evaluation of the system was done using standard criteria measures namely precision, recall, accuracy and F-measure. The results show that the model is relatively effective for the task of metadata extraction, especially for theses and dissertations. A combination of machine learning with these rule-based methods will be explored in the future for better results. 10.1177/0165551509105195",
	author = "Bolanle {Adefowoke Ojokoh} and Olumide {Sunday Adewale} and Samuel {Oluwole Falaki}",
	citeulike-article-id = "6545194",
	citeulike-linkout-0 = "http://dx.doi.org/10.1177/0165551509105195",
	citeulike-linkout-1 = "http://jis.sagepub.com/cgi/content/abstract/35/5/563",
	day = "1",
	doi = "10.1177/0165551509105195",
	journal = "Journal of Information Science",
	month = "October",
	number = "5",
	pages = "563--570",
	posted-at = "2010-01-15 20:48:09",
	priority = "2",
	title = "Automated document metadata extraction",
	url = "http://dx.doi.org/10.1177/0165551509105195",
	volume = "35",
	year = "2009"
}

@Book{ citeulike:273980,
	abstract = "{The development of the Semantic Web, with machine-readable content, has the potential to revolutionize the World Wide Web and its use. <i>A Semantic Web Primer</i> provides an introduction and guide to this emerging field, describing its key ideas, languages, and technologies. Suitable for use as a textbook or for self-study by professionals, it concentrates on undergraduate-level fundamental concepts and techniques that will enable readers to proceed with building applications on their own. It includes exercises, project descriptions, and annotated references to relevant online materials. A Semantic Web Primer is the only available book on the Semantic Web to include a systematic treatment of the different languages (XML, RDF, OWL, and rules) and technologies (explicit metadata, ontologies, and logic and inference) that are central to Semantic Web development. The book also examines such crucial related topics as ontology engineering and application scenarios.<br /> <br /> After an introductory chapter, topics covered in succeeding chapters include XML and related technologies that support semantic interoperability; RDF and RDF Schema, the standard data model for machine-processable semantics; and OWL, the W3C-approved standard for a Web ontology language more extensive than RDF Schema; rules, both monotonic and nonmonotonic, in the framework of the Semantic Web; selected application domains and how the Semantic Web would benefit them; the development of ontology-based systems; and current debates on key issues and predictions for the future.}",
	author = "Grigoris Antoniou and Frank {van Harmelen}",
	citeulike-article-id = "273980",
	howpublished = "Hardcover",
	isbn = "0262012103",
	keywords = "semantic\_web",
	month = "April",
	posted-at = "2009-06-30 19:51:07",
	priority = "2",
	publisher = "{The MIT Press}",
	title = "A Semantic Web Primer (Cooperative Information Systems)",
	url = "http://www.amazon.com/exec/obidos/redirect?tag=citeulike07-20&path=ASIN/0262012103",
	year = "2004"
}

@Article{ citeulike:4149298,
	abstract = "In the age of digital information more and more digital libraries and historical archives are using information systems in order to facilitate the document retrieval and provide better visualization of the search results and document presentation. Much research has been done in the field of digital libraries, but in the case of historical archives, which have particular needs, this is not the case. To this end, we investigate the use of new tools, which are based on the ontology of the historical archive in order to provide a new and effective method for document retrieval in a dynamic environment which will take into account the collaboration needs of the users.",
	author = "Katifori Akrivi and Golemati Maria",
	citeulike-article-id = "4149298",
	keywords = "library, ontology",
	posted-at = "2009-03-08 16:44:53",
	priority = "2",
	title = "Ontology Aided Information Retrieval in Digital Historical Archives",
	url = "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.130.6092",
	year = "2007"
}

@Article{ citeulike:7057306,
	abstract = {A new approach to scientometric indicators which is based on frequency distribution characteristics is presented. The pub lication productivity of ten major OECD countries during the 1981-1985 period is studied. A stochastic "cumulative ad vantage" process having a Waring-type stationary limit distri bution is assumed to underlie the publication process. This model makes it possible to estimate such indicators as the "Publication Potential" and the "Cumulative Advantage Coef ficient" which indicate the number of potential and of factual authors and the effect of the "success-breeds-success" phenom enon, respectively. A discussion of the publication behaviour of the scientific elite based on indicators of the distribution tail concludes the comparative evaluation. 10.1177/016555159001600107},
	author = "T. Braun and W. Glanzel and A. Schubert",
	citeulike-article-id = "7057306",
	citeulike-linkout-0 = "http://dx.doi.org/10.1177/016555159001600107",
	citeulike-linkout-1 = "http://jis.sagepub.com/cgi/content/abstract/16/1/37",
	day = "1",
	doi = "10.1177/016555159001600107",
	journal = "Journal of Information Science",
	keywords = "frequency\_distribution, lotka",
	month = "January",
	number = "1",
	pages = "37--44",
	posted-at = "2010-04-22 09:16:52",
	priority = "2",
	title = "Publication productivity: from frequency distributions to scientometric indicators",
	url = "http://dx.doi.org/10.1177/016555159001600107",
	volume = "16",
	year = "1990"
}

@Article{ citeulike:6551067,
	abstract = "This article will focus on how two different metadata harvestersOAIster and the Online Computer Library Center's (OCLC) WorldCattransform and present Dublin Core metadata extracted from CONTENTdm. It offers an examination, in plain language, of what two service providers do to metadata once they are harvested, and, in a case study, shows examples of how specific records display in both the local and aggregated interfaces. By helping metadata creators understand what happens to their metadata as it is harvested and transformed, this article aims to assist them in designing their metadata to be intelligible and useful to end-users across platforms.",
	author = "Amalia Beisler and Glee Willis",
	citeulike-article-id = "6551067",
	citeulike-linkout-0 = "http://dx.doi.org/10.1080/19386380903095099",
	doi = "10.1080/19386380903095099",
	journal = "Journal of Library Metadata",
	keywords = "metadata, oai",
	number = "1",
	pages = "65--97",
	posted-at = "2010-01-16 17:08:22",
	priority = "2",
	publisher = "Routledge",
	title = "Beyond Theory: Preparing Dublin Core Metadata for OAI-PMH Harvesting",
	url = "http://dx.doi.org/10.1080/19386380903095099",
	volume = "9",
	year = "2009",
	file = "{:references:oai\_ore:beisler.2009.jlm.oai\_pmh\_dublin\_core.pdf|}"
}

@TechReport{ citeulike:2567842,
	author = "T. Berners-Lee",
	citeulike-article-id = "2567842",
	citeulike-linkout-0 = "http://www.w3.org/DesignIssues/Metadata",
	howpublished = "http://www.w3.org/DesignIssues/Metadata",
	posted-at = "2010-01-16 01:59:55",
	priority = "2",
	title = "Metadata Architecture",
	url = "http://www.w3.org/DesignIssues/Metadata",
	year = "1997"
}

@Article{ citeulike:1810860,
	author = "Tim Berners-Lee and Nigel Shadbolt and Wendy Hall",
	citeulike-article-id = "1810860",
	citeulike-linkout-0 = "http://eprints.ecs.soton.ac.uk/12614/01/Semantic\\_Web\\_Revisted.pdf",
	journal = "IEEE Intelligent Systems",
	keywords = "semantic\_web",
	month = "May",
	pages = "96--101",
	posted-at = "2010-01-16 02:06:30",
	priority = "2",
	title = "The Semantic Web Revisited",
	url = "http://eprints.ecs.soton.ac.uk/12614/01/Semantic_Web_Revisted.pdf",
	volume = "21.3",
	year = "2006"
}

@Article{ citeulike:850436,
	author = "P. D. Bruza and D. W. Song and K. F. Wong",
	citeulike-article-id = "850436",
	journal = "Journal of the American Society for Information Science",
	number = "12",
	pages = "1090--1105",
	posted-at = "2010-01-15 16:57:19",
	priority = "2",
	title = "Aboutness from a Commonsense Perspective",
	volume = "51",
	year = "2000"
}

@InCollection{ citeulike:6071587,
	abstract = "The mathematical concept of document resemblance captures well the informal notion of syntactic similarity. The resemblance can be estimated using a fixed size ” sketch” for each document. For a large collection of documents (say hundreds of millions) the size of this sketch is of the order of a few hundred bytes per document. However, for efficient large scale web indexing it is not necessary to determine the actual resemblance value: it suffices to determine whether newly encountered documents are duplicates or near-duplicates of documents already indexed. In other words, it suffices to determine whether the resemblance is above a certain threshold. In this talk we show how this determination can be made using a ” sample” of less than 50 bytes per document. The basic approach for computing resemblance has two aspects: first, resemblance is expressed as a set (of strings) intersection problem, and second, the relative size of intersections is evaluated by a process of random sampling that can be done independently for each document. The process of estimating the relative size of intersection of sets and the threshold test discussed above can be applied to arbitrary sets, and thus might be of independent interest. The algorithm for filtering near-duplicate documents discussed here has been successfully implemented and has been used for the last three years in the context of the AltaVista search engine.",
	author = "Andrei Broder",
	citeulike-article-id = "6071587",
	citeulike-linkout-0 = "http://dx.doi.org/10.1007/3-540-45123-4\\_1",
	citeulike-linkout-1 = "http://www.springerlink.com/content/ktn21yjul3r379xy",
	doi = "10.1007/3-540-45123-4\_1",
	journal = "Combinatorial Pattern Matching",
	pages = "1--10",
	posted-at = "2010-01-15 14:18:25",
	priority = "2",
	title = "Identifying and Filtering Near-Duplicate Documents",
	url = "http://dx.doi.org/10.1007/3-540-45123-4_1",
	year = "2000"
}

@Book{ citeulike:3090157,
	citeulike-article-id = "3090157",
	edition = "Second",
	author = "Franz Baader and Diego Calvanese and Deborah L. Mcguinness and Daniele Nardi and Peter F. Patel-Schneider",
	keywords = "description\_logic",
	posted-at = "2009-06-30 20:36:54",
	priority = "2",
	publisher = "Cambridge University Press",
	title = "The Description Logic Handbook",
	year = "2007"
}

@InProceedings{ citeulike:1126745,
	abstract = "This paper challenges some of the assumptions underlying the metadata creation process in the context of two communities of practice, based around learning object repositories and open e- Print archives. The importance of quality assurance for metadata creation is discussed and evidence from the literature, from the practical experiences of repositories and archives, and from related research and practices within other communities is presented. Issues for debate and further investigation are identified, formulated as a series of key research questions. Although there is much work to be done in the area of quality assurance for metadata creation, this paper represents an important first step towards a fuller understanding of the subject.",
	author = "Jane Barton and Sarah Currier and Jessie M. N. Hey",
	citeulike-article-id = "1126745",
	citeulike-linkout-0 = "http://dcpapers.dublincore.org/ojs/pubs/article/view/732/728",
	citeulike-linkout-1 = "http://www.siderean.com/dc2003/201\\_paper60.pdf",
	keywords = "metadata\_creation, metadata\_quality",
	pages = "29--36",
	posted-at = "2010-01-07 13:53:50",
	priority = "0",
	publisher = "DCMI",
	title = "Building Quality Assurance into Metadata Creation: an Analysis based on the Learning Objects and e-Prints Communities of Practice",
	url = "http://dcpapers.dublincore.org/ojs/pubs/article/view/732/728",
	year = "2003"
}

@Article{ citeulike:3857646,
	abstract = "Findings from a data mapping and extraction exercise undertaken as part of the STAR project are described and related to recent work in the area. The exercise was undertaken in conjunction with English Heritage and encompassed five differently structured relational databases containing various results of archaeological excavations. The aim of the exercise was to demonstrate the potential benefits in cross searching data expressed as RDF and conforming to a common overarching conceptual data structure schema - the English Heritage Centre for Archaeology ontological model (CRM-EH), an extension of the CIDOC Conceptual Reference Model (CRM). A semi-automatic mapping/extraction tool proved an essential component. The viability of the approach is demonstrated by web services and a client application on an integrated data and concept network.",
	author = "Ceri Binding and Keith May and Douglas Tudhope",
	citeulike-article-id = "3857646",
	doi = "10.1007/978-3-540-87599-4\_30",
	journal = "Research and Advanced Technology for Digital Libraries",
	keywords = "cidoc\_crm, semantic\_interoperability",
	pages = "280--290",
	posted-at = "2009-06-30 16:14:44",
	priority = "2",
	title = "Semantic Interoperability in Archaeological Datasets: Data Mapping and Extraction Via the CIDOC CRM",
	url = "http://dx.doi.org/10.1007/978-3-540-87599-4_30",
	year = "2008"
}

@Article{ citeulike:5730605,
	author = "Alistair Black",
	citeulike-article-id = "5730605",
	citeulike-linkout-0 = "http://dx.doi.org/DOI:10.1002/aris.1440400118",
	doi = "DOI:10.1002/aris.1440400118",
	journal = "Annual Review of Information Science \& Technology (ARIST)",
	keywords = "information\_history",
	pages = "441--473",
	posted-at = "2009-09-07 02:47:02",
	priority = "0",
	title = "Information history",
	url = "http://dx.doi.org/DOI:10.1002/aris.1440400118",
	volume = "40",
	year = "2006",
	file = "{:references:phd1st:glis702:information\_history.pdf|}"
}

@Article{ citeulike:5730696,
	author = "Rayward W. Boyd",
	citeulike-article-id = "5730696",
	citeulike-linkout-0 = "http://dx.doi.org/doi:10.1016/0306-4573(95)00046-J",
	doi = "doi:10.1016/0306-4573(95)00046-J",
	issn = "0306-4573",
	journal = "Information Processing and Management",
	keywords = "information\_history",
	number = "1",
	pages = "3--17",
	posted-at = "2009-09-07 03:22:17",
	priority = "0",
	title = "The history and historiography of information science: Some reflections",
	url = "http://dx.doi.org/doi:10.1016/0306-4573(95)00046-J",
	volume = "32",
	year = "1996",
	file = "{:references:phd1st:glis702:boyd.information.history.pdf|}"
}

@Book{ citeulike:290835,
	abstract = "{Knowledge representation is at the very core of a radical idea for understanding intelligence. Instead of trying to understand or build brains from the bottom up, its goal is to understand and build intelligent behavior from the top down, putting the focus on what an agent needs to know in order to behave intelligently, how this knowledge can be represented symbolically, and how automated reasoning procedures can make this knowledge available as needed. <br><br>This landmark text takes the central concepts of knowledge representation developed over the last 50 years and illustrates them in a lucid and compelling way. Each of the various styles of representation is presented in a simple and intuitive form, and the basics of reasoning with that representation are explained in detail. This approach gives readers a solid foundation for understanding the more advanced work found in the research literature. The presentation is clear enough to be accessible to a broad audience, including researchers and practitioners in database management, information retrieval, and object-oriented systems as well as artificial intelligence. This book provides the foundation in knowledge representation and reasoning that every AI practitioner needs.<br><br>*Authors are well-recognized experts in the field who have applied the techniques to real-world problems <br>* Presents the core ideas of KR\&R in a simple straight forward approach, independent of the quirks of research systems <br>*Offers the first true synthesis of the field in over a decade}",
	author = "Ronald Brachman and Hector Levesque",
	citeulike-article-id = "290835",
	howpublished = "Hardcover",
	isbn = "1558609326",
	keywords = "ai",
	month = "May",
	posted-at = "2009-06-30 20:42:40",
	priority = "2",
	publisher = "Morgan Kaufmann",
	title = "Knowledge Representation and Reasoning (The Morgan Kaufmann Series in Artificial Intelligence)",
	url = "http://www.amazon.com/exec/obidos/redirect?tag=citeulike07-20&path=ASIN/1558609326",
	year = "2004"
}

@Article{ citeulike:5024113,
	abstract = "The quality of the metadata records in a digital library has a critical effect on its information access and retrieval. The open source Metadata Repository at the National Science Digital Library {(NSDL)} comprises of collections of metadata submitted from various data providers and is open for harvesting by the public. Since metadata in the repository came from many different data providers, there is a lack of consistency. This lack of consistent hampers the information services provided by the {NSDL} to its own web users as well as to aggregators who harvest the {NSDL} data. The goal of this study is to assess the quality of the current metadata records in the {NSDL} Repository. We harvested over one million Dublin Core metadata records submitted through November 2005 to the repository using the Open Archives Initiative Protocol {(OAIP).} The data harvested was loaded into an Excel database and exhaustive tabulations of all the Dublin Core metadata fields were performed. The criteria of quality assessment are based on the following areas: consistency, completeness, accuracy and local additions of data providers. This study reports on the results of the tabulations and assessment of metadata quality. Information organizations and institutions will benefit from the results of this study in determining which parts of the repository they aim to harvest. The data collected for this study will be made available to the public to contribute to promoting studies on metadata quality improvement by enabling other researchers to have access to the data for further analysis.",
	author = "Yen Bui and Jung-Ran Park",
	citeulike-article-id = "5024113",
	keywords = "metadata, metadata\_quality",
	posted-at = "2009-06-30 17:41:23",
	priority = "2",
	year = "2006",
	pages = "1--13",
	title = "An assessment of metadata quality: A case study of the National Science Digital Library Metadata Repository",
	url = "http://idea.library.drexel.edu/handle/1860/1600",
	file = "{:references:metadata.quality:bui.2006.nsdl\_metadata\_quality.pdf|}"
}

@Article{ citeulike:3861763,
	abstract = "A study to develop a methodology for studying index consistency in machine readable cataloging (MARC) records and to study such consistency in subject cataloging practice between the Library of Congress (LC) and non-LC libraries found that consistency among indexers is difficult to achieve even when the same indexing policies and vocabulary are used.",
	author = "Lois M. Chan",
	citeulike-article-id = "3861763",
	citeulike-linkout-0 = "http://proquest.umi.com/pqdweb?did=7096175\&\\#38;Fmt=7\&\\#38;clientId=11263\&\\#38;RQT=309\&\\#38;VName=PQD",
	journal = "Information Technology and Libraries",
	keywords = "interindexer\_consistency, metadata, subject\_analysis",
	number = "4",
	pages = "349--358",
	posted-at = "2010-03-14 00:05:03",
	priority = "3",
	title = "Inter-Indexer Consistency in Subject Cataloging",
	url = "http://proquest.umi.com/pqdweb?did=7096175&#38;Fmt=7&#38;clientId=11263&#38;RQT=309&#38;VName=PQD",
	volume = "8",
	year = "1989"
}

@Article{ citeulike:6588726,
	abstract = "With the proliferation of Web 2.0 technologies, there is an expanded opportunity for individuals to get involved in information exchange. In this study, the sustainability of a virtual community for teachers and educators were investigated. The research model borrows the key concepts from the IS continuance model, social influence theory, the uses and gratifications paradigm, and relationship marketing to explain user intention to continue using a virtual community, as well as intention to recommend the community to others. Satisfaction, commitment, group norms are found to have significant impacts on intention to continue using and intention to recommend. Among the three factors, satisfaction has the highest impact on behavioral intentions. Individual-related factors (purposive value and self-discovery) are found to have significant impacts on user satisfaction, while social-related factors are more important in determining commitment and group norms. The results of this study provide important implications for both research and practice. 10.1177/0165551508099088",
	author = "Christy M. K. Cheung and Matthew K. O. Lee",
	citeulike-article-id = "6588726",
	citeulike-linkout-0 = "http://dx.doi.org/10.1177/0165551508099088",
	citeulike-linkout-1 = "http://jis.sagepub.com/cgi/content/abstract/35/3/279",
	day = "1",
	doi = "10.1177/0165551508099088",
	journal = "Journal of Information Science",
	month = "June",
	number = "3",
	pages = "279--298",
	posted-at = "2010-01-25 19:07:49",
	priority = "2",
	title = "Understanding the sustainability of a virtual community: model development and empirical test",
	url = "http://dx.doi.org/10.1177/0165551508099088",
	volume = "35",
	year = "2009"
}

@Article{ citeulike6581132,
	abstract = "Recent proposals to improve the quality of interaction with the World Wide Web suggest considering the Web as a huge semi-structured database, so that retrieving information can be supported by the task of database querying. Under this view, it is important to represent the form of both the network, and the documents placed in the nodes of the network. However, the current proposals do not pay sufficient attention to represent document structures and reasoning about them. In this paper, we address these problems by providing a framework where Document Type Definitions (DTDs) expressed in the eXtensible Markup Language (XML) are formalized in an expressive Description Logic equipped with sound and complete inference algorithms. We provide methods for verifying conformance of a document to a DTD in polynomial time, and structural equivalence of DTDs in worst case deterministic exponential time, improving known algorithms for this problem which were double exponential. We also deal with pa...",
	author = "Diego Calvanese and Giuseppe D. Giacomo and Maurizio Lenzerini",
	citeulike-article-id = "6581132",
	citeulike-linkout-0 = "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.22.6641",
	journal = "Journal of Logic and Computation",
	keywords = "description\_logic\_approach, document\_structures",
	pages = "295--318",
	posted-at = "2010-01-22 21:51:43",
	priority = "2",
	title = "Representing and Reasoning on XML Documents: A Description Logic Approach",
	url = "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.22.6641",
	volume = "9",
	year = "1999"
}

@Article{ citeulike:3861772,
	abstract = "It is often assumed that the amount of interindexer consistency experienced under a given method of indexing is somehow indicative of the quality of the indexing. To explore this assumption, two hypotheses are stated concerning the possible connection between interindexer consistency and indexing quality. A specific counter-example is then exhibited which shows both hypotheses to be invalid. Although a mathematical analysis of the counterexample yields certain insights, the general relationship between interindexer consistency and successful retrieval is more subtle than might have been expected. It is concluded that until equations describing this relationship have been derived, measurements of interindexer consistency will have little meaning as clues to indexing quality.",
	author = "William S. Cooper",
	citeulike-article-id = "3861772",
	citeulike-linkout-0 = "http://dx.doi.org/10.1002/asi.4630200314",
	doi = "10.1002/asi.4630200314",
	journal = "American Documentation",
	keywords = "interindexer\_consistency",
	number = "3",
	pages = "268--278",
	posted-at = "2010-01-15 19:55:29",
	priority = "0",
	title = "Is interindexer consistency a hobgoblin?",
	url = "http://dx.doi.org/10.1002/asi.4630200314",
	volume = "20",
	year = "1969"
}

@Book{ citeulike:111664,
	abstract = "{Mining the Web: Discovering Knowledge from Hypertext Data is the first book devoted entirely to techniques for producing knowledge from the vast body of unstructured Web data. Building on an initial survey of infrastructural issuesincluding Web crawling and indexingChakrabarti examines low-level machine learning techniques as they relate specifically to the challenges of Web mining. He then devotes the final part of the book to applications that unite infrastructure and analysis to bring machine learning to bear on systematically acquired and stored data. Here the focus is on results: the strengths and weaknesses of these applications, along with their potential as foundations for further progress. From Chakrabarti's workpainstaking, critical, and forward-lookingreaders will gain the theoretical and practical understanding they need to contribute to the Web mining effort.<br><br>* A comprehensive, critical exploration of statistics-based attempts to make sense of Web Mining.<br>* Details the special challenges associated with analyzing unstructured and semi-structured data.<br>* Looks at how classical Information Retrieval techniques have been modified for use with Web data.<br>* Focuses on today's dominant learning methods: clustering and classification, hyperlink analysis, and supervised and semi-supervised learning.<br>* Analyzes current applications for resource discovery and social network analysis.<br>* An excellent way to introduce students to especially vital applications of data mining and machine learning technology.</li></ul>}",
	author = "Soumen Chakrabarti",
	citeulike-article-id = "111664",
	howpublished = "Hardcover",
	isbn = "1558607544",
	keywords = "information\_retrieval",
	month = "August",
	posted-at = "2009-06-30 20:15:13",
	priority = "2",
	publisher = "{Morgan Kaufmann}",
	title = "Mining the Web: Analysis of Hypertext and Semi Structured Data",
	url = "http://www.amazon.com/exec/obidos/redirect?tag=citeulike07-20&path=ASIN/1558607544",
	year = "2002"
}

@Article{ citeulike:4867464,
	author = "Lois M. Chan and Marcia L. Zeng",
	citeulike-article-id = "4867464",
	doi = "10.1045/june2006-chan",
	journal = "D-Lib Magazine",
	keywords = "interoperability, metadata",
	month = "June",
	number = "6",
	posted-at = "2009-06-30 17:08:16",
	priority = "2",
	title = "{Metadata Interoperability and Standardization -- A Study of Methodology Part I Achieving Interoperability at the Schema Level}",
	url = "http://dx.doi.org/10.1045/june2006-chan",
	volume = "12",
	year = "2006",
	file = "{:references:metadata:chan.2006.dlib.metadata\_interoperability.pdf|}"
}

@Article{ citeulike:4746682,
	author = "Abdus S. Chaudhry and Tan P. Jiun",
	citeulike-article-id = "4746682",
	issn = "0022-0418",
	journal = "Journal of Documentation",
	keywords = "a, archive, digital\_museum, taxonomy",
	number = "6",
	pages = "751--776",
	posted-at = "2009-06-04 21:47:07",
	priority = "3",
	publisher = "Emerald Group Publishing Limited",
	title = "Enhancing access to digital information resources on heritage: A case of development of a taxonomy at the Integrated Museum and Archives System in Singapore",
	volume = "61",
	year = "2005",
	file = "{:references:archive.ontology:chaudhry\_etal\_2005.pdf|}"
}

@InCollection{ citeulike:4746665,
	abstract = "This study describes the building of ontologies to enhance current digital museum archives. Ontologies are employed to move the service level from information to knowledge retrieval. This study concentrates on a design procedure that exploits Formal Concept Analysis (FCA) to obtain conceptual structures, and Description Logic (DL) to denote concept relations in logic expressions. The empirical findings reveal that development procedures help guide ontology builders to build ontological knowledge bases step by step. Furthermore, the knowledge extraction is helpful and connectable for builders and other tools.",
	author = "Yu-Liang Chi",
	citeulike-article-id = "4746665",
	doi = "10.1007/11739685\_31",
	journal = "Advances in Machine Learning and Cybernetics",
	keywords = "archive, ontology",
	pages = "295--304",
	posted-at = "2009-06-04 21:28:54",
	priority = "0",
	title = "Constructing Ontologies for Sharing Knowledge in Digital Archives",
	url = "http://dx.doi.org/10.1007/11739685\_31",
	year = "2006",
	file = "{:references:archive.ontology:chi\_2006\_lect-notes-computer-sci.pdf|}"
}

@Article{ DBLP:ChongMSL03,
	author = "Quddus Chong and Anup Marwadi and Kaustubh Supekar and Yugyung Lee",
	title = "Ontology Based Metadata Management in Medical Domains",
	journal = "Journal of Research and Practice in Information Technology",
	volume = "35",
	number = "2",
	year = "2003",
	pages = "139--154",
	bibsource = "DBLP, http://dblp.uni-trier.de",
	file = "{:references:archive.ontology:chong\_etal\_2003.pdf|}"
}

@Book{ citeulike:524985,
	author = "G. G. Chowdhury and Sudatta Chowdhury",
	citeulike-article-id = "524985",
	howpublished = "Paperback",
	isbn = "1856044653",
	keywords = "digital\_library",
	month = "November",
	posted-at = "2009-06-30 19:30:45",
	priority = "2",
	publisher = "Facet Publishing",
	title = "Introduction to Digital Libraries",
	url = "http://www.amazon.com/exec/obidos/redirect?tag=citeulike07-20&path=ASIN/1856044653",
	year = "2002"
}

@Article{ citeulike:1645552,
	abstract = "Metadata enables users to find the resources they require, therefore it is an important component of any digital learning object repository. Much work has already been done within the learning technology community to assure metadata quality, focused on the development of metadata standards, specifications and vocabularies and their implementation within repositories. The metadata creation process has thus far been largely overlooked. There has been an assumption that metadata creation will be straightforward and that where machines cannot generate metadata effectively, authors of learning materials will be the most appropriate metadata creators. However, repositories are reporting difficulties in obtaining good quality metadata from their contributors, and it is becoming apparent that the issue of metadata creation warrants attention. This paper surveys the growing body of evidence, including three UK-based case studies, scopes the issues surrounding human-generated metadata creation and identifies questions for further investigation. Collaborative creation of metadata by resource authors and metadata specialists, and the design of tools and processes, are emerging as key areas for deeper research. Research is also needed into how end users will search learning object repositories.",
	author = "Sarah Currier and Jane Barton and R\&oacute;n\&aacute;n O\&rsquo;beirne and Ben Ryan",
	citeulike-article-id = "1645552",
	doi = "10.1080/0968776042000211494",
	journal = "ALT-J",
	keywords = "metadata, metadata\_creation, metadata\_quality",
	number = "1",
	pages = "5--20",
	posted-at = "2009-07-01 21:01:35",
	priority = "0",
	publisher = "Routledge",
	title = "Quality assurance for digital learning object repositories: issues for the metadata creation process",
	url = "http://dx.doi.org/10.1080/0968776042000211494",
	volume = "12",
	year = "2004",
	file = "{:references:metadata.quality:quality-assurance-lom.pdf|}"
}

@InCollection{ citeulike:6255343,
	abstract = "Even though the Dublin Core Metadata Element Set is well accepted as a general solution, it fails to describe more complex information assets and their cross-correlation. These include data from political history, history of arts and sciences, archaeology or observational data from natural history or geosciences. Therefore IFLA and ICOM are merging their core ontologies, an important step towards semantic interoperability of metadata schemata across all archives, libraries and museums. It opens new prospects for advanced global information integration services. The first draft of the combined model was published in June 2006.",
	author = "Martin Doerr and Patrick LeBoeuf",
	citeulike-article-id = "6255343",
	citeulike-linkout-0 = "http://dx.doi.org/10.1007/978-3-540-77088-6\\_11",
	doi = "10.1007/978-3-540-77088-6\_11",
	journal = "Digital Libraries: Research and Development",
	keywords = "cidoc, frbr",
	pages = "114--123",
	posted-at = "2010-01-16 02:09:02",
	priority = "2",
	title = "Modelling Intellectual Processes: The FRBR - CRM Harmonization",
	url = "http://dx.doi.org/10.1007/978-3-540-77088-6_11",
	year = "2007"
}

@InProceedings{ citeulike:6549002,
	abstract = "From an architectural perspective, there is no essential distinction between data and metadata. Both can be represented in distributed active relationships {(DARs),} which are an extension of the Warwick framework {(C.} Lagoze et al., 1996). The {DAR} model is a powerful way to express relationships between networked resources and to allow such relationships to be dynamically downloadable and executable",
	author = "R. Daniel and C. Lagoze and S. D. Payette",
	booktitle = "Research and Technology Advances in Digital Libraries, 1998. ADL 98. Proceedings. IEEE International Forum on",
	citeulike-article-id = "6549002",
	citeulike-linkout-0 = "http://dx.doi.org/10.1109/ADL.1998.670428",
	doi = "10.1109/ADL.1998.670428",
	keywords = "active, architecture, dar, data, databases, digital, distributed, downloadable, dynamically, executable, file-import-10-01-16, framework, information, libraries, metadata, networked, relationships, resource, retrieval, structures, systems, warwick",
	pages = "276--288",
	posted-at = "2010-01-16 01:54:34",
	priority = "2",
	title = "A metadata architecture for digital libraries",
	url = "http://dx.doi.org/10.1109/ADL.1998.670428",
	year = "1998"
}

@Article{ citeulike:6548833,
	abstract = "Purpose -- To report on the University of Toronto's implementation of an institutional repository. Design/methodology/approach -- Describe decision making process. A range of qualitative research methods were used to solicit early adopter and library concerns. Findings were then used to guide implementation. Findings -- Provides the rational behind decisions made. Argues that modified qualitative research methods may be useful to new library projects. Research limitations/implications -- The report is specific to an institution. Practical implications -- Prioritizing actions, focusing on faculty and leveraging resources, notably student assistants is key. Originality/value -- This paper provides practical information and a model which may be useful for others implementing repository services or other emerging technologies.",
	author = "Rea Devakos",
	citeulike-article-id = "6548833",
	citeulike-linkout-0 = "http://dx.doi.org/10.1108/07378830610669556",
	citeulike-linkout-1 = "http://www.emeraldinsight.com/10.1108/07378830610669556",
	doi = "10.1108/07378830610669556",
	journal = "Library Hi Tech",
	keywords = "academic, digital, file-import-10-01-16, libraries, qualitative, research",
	number = "2",
	pages = "173--182",
	posted-at = "2010-01-16 01:34:41",
	priority = "2",
	title = "Towards user responsive institutional repositories: a case study",
	url = "http://dx.doi.org/10.1108/07378830610669556",
	volume = "24",
	year = "2006"
}

@Article{ citeulike:4217790,
	abstract = "Abstract Long-term digital preservation, the process of maintaining digital objects through time to ensure continued access, has become a crucial issue in recent years. Whilst the amount of digitised information is constantly increasing, so too is the pace of progress in information technology, resulting in obsolescence of the software and hardware required to access and view digital information. Despite many organisations recognising this threat and the resulting need for preservation action, more work is required to effectively address the issue. We present in this article a framework for the long-term digital preservation of 3-D data. This framework is based on two pertinent preservation practices, emulation and metadata which ensure that the authenticity and usability, respectively, of a preserved digital object remain intact through time. An evaluation of our framework is presented which illustrates the viability of our approach in retaining accessibility, authenticity and usability for future end users.",
	author = "Julie Doyle and Herna Viktor and Eric Paquet",
	citeulike-article-id = "4217790",
	citeulike-linkout-0 = "http://dx.doi.org/10.1007/s00799-009-0051-7",
	citeulike-linkout-1 = "http://www.springerlink.com/content/r624114872486937",
	doi = "10.1007/s00799-009-0051-7",
	journal = "International Journal on Digital Libraries",
	keywords = "metadata, preservation",
	pages = "33--47",
	posted-at = "2010-01-15 17:29:13",
	priority = "2",
	title = "Long-term digital preservation: preserving authenticity and usability of 3-D data",
	url = "http://dx.doi.org/10.1007/s00799-009-0051-7",
	year = "2009"
}

@Article{ citeulike:4087393,
	abstract = "This article presents the methodology that has been successfully used over the past seven years by an interdisciplinary team to create the International Committee for Documentation of the International Council of Museums (CIDOC) CONCEPTUAL REFERENCE MODEL (CRM), a high-level ontology to enable information integration for cultural heritage data and their correlation with library and archive information. The CIDOC CRM is now in the process to become an International Organization for Standardization (ISO) standard. This article justifies in detail the methodology and design by functional requirements and gives examples of its contents. The CIDOC CRM analyzes the common conceptualizations behind data and metadata structures to support data transformation, mediation, and merging. It is argued that such ontologies are property-centric, in contrast to terminological systems, and should be built with different methodologies. It is demonstrated that ontological and epistemological arguments are equally important for an effective design, in particular when dealing with knowledge from the past in any domain. It is assumed that the presented methodology and the upper level of the ontology are applicable in a far wider domain.",
	address = "Menlo Park, CA, USA",
	author = "Martin Doerr",
	citeulike-article-id = "4087393",
	issn = "0738-4602",
	journal = "AI Mag.",
	keywords = "cidoc, cidoc\_crm",
	number = "3",
	pages = "75--92",
	posted-at = "2009-06-30 16:10:21",
	priority = "2",
	publisher = "American Association for Artificial Intelligence",
	title = "The CIDOC conceptual reference module: an ontological approach to semantic interoperability of metadata",
	url = "http://portal.acm.org/citation.cfm?id=958678",
	volume = "24",
	year = "2003",
	file = "{:references:archive.ontology:doerr.2003.aimag.cidoc\_crm.pdf|}"
}

@Article{ citeulike:4149301,
	abstract = "In this paper, we argue that a core ontology is one of the key building blocks necessary to enable the scalable assimilation of information from diverse sources. A complete and extensible ontology that expresses the basic concepts that are common across a variety of domains and can provide the basis for specialization into domain-specific concepts and vocabularies, is essential for well-defined mappings between domain-specific knowledge representations (i.e., metadata vocabularies) and the subsequent building of a variety of services such as cross-domain searching, browsing, data mining and knowledge extraction. This paper describes the results of a series of three workshops held in 2001 and 2002 which brought together representatives from the cultural heritage and digital library communities with the goal of harmonizing their knowledge perspectives and producing a core ontology. The knowledge perspectives of these two communities were represented by the CIDOC/CRM [31], an ontology for information exchange in the cultural heritage and museum community, and the ABC ontology [33], a model for the exchange and integration of digital library information. This paper describes the mediation process between these two different knowledge biases and the results of this mediation -- the harmonization of the ABC and CIDOC/CRM ontologies, which we believe may provide a useful basis for information integration in the wider scope of the involved communities.",
	author = "Martin Doerr and Jane Hunter and Carl Lagoze",
	citeulike-article-id = "4149301",
	citeulike-linkout-0 = "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.6.409",
	citeulike-linkout-1 = "http://journals.tdl.org/jodi/article/download/92/91",
	journal = "Journal of Digital Information",
	keywords = "cidoc\_crm, ontology",
	number = "1",
	posted-at = "2009-06-30 16:12:43",
	priority = "2",
	title = "Towards a core ontology for information integration",
	url = "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.6.409",
	volume = "4",
	year = "2003"
}

@Article{ citeulike:1003673,
	author = "Stephen Downes",
	citeulike-article-id = "1003673",
	citeulike-linkout-0 = "http://www-jime.open.ac.uk/2004/5/downes-2004-5-disc-t.html",
	journal = "Journal of Interactive Media in Education",
	keywords = "metadata",
	number = "5",
	posted-at = "2009-12-02 00:29:53",
	priority = "0",
	title = "Resource Profiles",
	url = "http://www-jime.open.ac.uk/2004/5/downes-2004-5-disc-t.html",
	year = "2004",
	file = "{:references:metadata:downes.2004.resource.profiles.pdf|}"
}

@Article{ unpublished:elenatorou,
	author = "Torou Akrivi Katifori Elena and Vassilakis Costas",
	keywords = "digital\_museum, information\_visualization, ontology",
	posted-at = "2009-06-04 21:08:17",
	priority = "3",
	title = "Creating an Historical Archive Ontology: Guidelines and Evalation",
	url = "http://oceanis.mm.di.uoa.gr/pened/papers/7-onto-meth.pdf",
	year = "2006",
	file = "{:references:archive.ontology:torou\_etal\_2006.pdf|}"
}

@Article{ citeulike:4052921,
	abstract = "The digital age has caused the paradigm to shift in academic libraries both in terms of their collections and the roles of their personnel. As academic libraries begin to digitize objects in their collections, how and who in the library creates access to these resources has become a hot issue. At the University of Nebraska-Lincoln Libraries (UNL Libraries), taskforces were formed to study metadata schemes used at UNL Libraries. The taskforces identified the various metadata schemes in use and the role of various departments within UNL Libraries in the creation of metadata. They made recommendations about how to document decisions relating to metadata and how to coordinate metadata creation and digitization projects. As a result, the authors decided to survey American Research Libraries (ARL) and other peer libraries to determine their metadata workflow. This paper discusses the results of the survey and provides insight as to how libraries may meet the challenge of creating metadata through the reorganization of departments and staffing responsibilities.",
	author = "Adonna Fleming and Margaret Mering and Judith A. Wolfe",
	citeulike-article-id = "4052921",
	doi = "10.1080/07317130802127983",
	journal = "Technical Services Quarterly",
	keywords = "metadata, metadata\_creation",
	number = "4",
	pages = "1--15",
	posted-at = "2009-07-01 20:54:57",
	priority = "2",
	publisher = "Routledge",
	title = "Library Personnel's Role in the Creation of Metadata: A Survey of Academic Libraries",
	url = "http://dx.doi.org/10.1080/07317130802127983",
	volume = "25",
	year = "2008"
}

@Article{ citeulike:1658742,
	address = "Tarrytown, NY, USA",
	author = "Muriel Foulonneau",
	citeulike-article-id = "1658742",
	doi = "10.1016/j.ipm.2006.06.004",
	issn = "0306-4573",
	journal = "Inf. Process. Manage.",
	keywords = "metadata, redundancy",
	month = "May",
	number = "3",
	pages = "740--751",
	posted-at = "2009-06-30 06:04:55",
	priority = "2",
	publisher = "Pergamon Press, Inc.",
	title = "Information redundancy across metadata collections",
	url = "http://dx.doi.org/10.1016/j.ipm.2006.06.004",
	volume = "43",
	year = "2007",
	file = "{:references:metadata.use:information.redundancy.metadata.pdf|}"
}

@Book{ citeulike:622433,
	abstract = {As the World Wide Web continues to expand, it becomes increasingly difficult for users to obtain information efficiently. Because most search engines read format languages such as HTML or SGML, search results reflect formatting tags more than actual page content, which is expressed in natural language. <i>Spinning the Semantic Web</i> describes an exciting new type of hierarchy and standardization that will replace the current "web of links" with a "web of meaning." Using a flexible set of languages and tools, the Semantic Web will make all available information -- display elements, metadata, services, images, and especially content -- accessible. The result will be an immense repository of information accessible for a wide range of new applications.<br /> <br /> This first handbook for the Semantic Web covers, among other topics, software agents that can negotiate and collect information, markup languages that can tag many more types of information in a document, and knowledge systems that enable machines to read Web pages and determine their reliability. The truly interdisciplinary Semantic Web combines aspects of artificial intelligence, markup languages, natural language processing, information retrieval, knowledge representation, intelligent agents, and databases.},
	author = "Dieter Fensel and James A. Hendler and Henry Lieberman and Wolfgang Wahlster",
	citeulike-article-id = "622433",
	howpublished = "Paperback",
	isbn = "026256212X",
	keywords = "semantic\_web",
	month = "March",
	posted-at = "2009-06-30 20:24:32",
	priority = "2",
	publisher = "The MIT Press",
	title = "Spinning the Semantic Web : Bringing the World Wide Web to Its Full Potential",
	url = "http://www.amazon.com/exec/obidos/redirect?tag=citeulike07-20&path=ASIN/026256212X",
	year = "2005"
}

@Article{ 984325,
	author = "Marcos Andr{\'e} Gon\c{c}alves and Edward A. Fox and Layne T. Watson and Neill A. Kipp",
	title = "Streams, structures, spaces, scenarios, societies (5s): A formal model for digital libraries",
	journal = "ACM Trans. Inf. Syst.",
	volume = "22",
	number = "2",
	year = "2004",
	issn = "1046-8188",
	pages = "270--312",
	doi = "http://doi.acm.org/10.1145/984321.984325",
	publisher = "ACM",
	address = "New York, NY, USA"
}

@Article{ citeulike:2793160,
	abstract = "Abstract\&nbsp;\&nbsp;Digital libraries (DLs) have eluded definitional consensus and lack agreement on common theories and frameworks. This makes comparison of DLs extremely difficult, promotes ad-hoc development, and impedes interoperability. In this paper we propose a formal ontology for DLs that defines the fundamental concepts, relationships, and axiomatic rules that govern the DL domain, therefore providing a frame of reference for the discussion of essential concepts of DL design and construction. The ontology is an axiomatic, formal treatment of DLs, which distinguishes it from other approaches that informally define a number of architectural variants. The process of construction of the ontology was guided by 5S, a formal framework for digital libraries. To test its expressibility we have used the ontology to create a taxonomy of DL services and to reason about issues of reusability, extensibility, and composability. Some practical applications of the ontology are also described including: the definition of a digital library services taxonomy, the proposal of a modeling language for digital libraries, and the specification of quality metrics to evaluate digital libraries. We also demonstrate how to use the ontology to formally describe DL architectures and to prove some properties about them, thus helping to further validate the ontology.",
	author = "Marcos Gon\c{c}alves and Edward Fox and Layne Watson",
	citeulike-article-id = "2793160",
	citeulike-linkout-0 = "http://dx.doi.org/10.1007/s00799-008-0033-1",
	citeulike-linkout-1 = "http://www.ingentaconnect.com/content/klu/799/2008/00000008/00000002/00000033",
	citeulike-linkout-2 = "http://www.springerlink.com/content/f74353ph47n55234",
	day = "1",
	doi = "10.1007/s00799-008-0033-1",
	issn = "1432-5012",
	journal = "International Journal on Digital Libraries",
	month = "April",
	number = "2",
	pages = "91--114",
	posted-at = "2010-01-15 20:20:39",
	priority = "2",
	publisher = "Springer",
	title = "Towards a digital library theory: a formal digital library ontology",
	url = "http://dx.doi.org/10.1007/s00799-008-0033-1",
	volume = "8",
	year = "2008"
}

@InProceedings{ citeulike:6544669,
	author = "Jane Greenberg and Maria C. Pattuelli and Bijan Parsia and W. Davenport Robertson",
	booktitle = "DCMI '01: Proceedings of the International Conference on Dublin Core and Metadata Applications 2001",
	citeulike-article-id = "6544669",
	keywords = "metadata",
	pages = "38--46",
	posted-at = "2010-01-15 17:22:41",
	priority = "2",
	publisher = "National Institute of Informatics, Tokyo, Japan",
	title = "Author-generated Dublin Core Metadata for Web Resources: A Baseline Study in an Organization",
	year = "2001"
}

@Book{ citeulike:587164,
	abstract = "{Ontologies provide a common vocabulary of an area and define, with different levels of formality, the meaning of the terms and the relationships between them. Ontological engineering refers to the set of activities concerning the ontology development process, the ontology life cycle, the methods and methodologies for building ontologies, and the tool suites and languages that support them. During the last decade, increasing attention has been focused on ontologies. Ontologies are now widely used in knowledge engineering, artificial intelligence and computer science; in applications related to areas such as knowledge management, natural language processing, e-commerce, intelligent information integration, bio-informatics, education; and in new emerging fields like the semantic web. The book presents the major issues of ontological engineering and describes the most outstanding ontologies currently available. It covers the practical aspects of selecting and applying methodologies, languages, and tools for building ontologies. Ontological Engineering will be of great value to students and researchers, and to developers who want to integrate ontologies in their information systems. }",
	author = "Asuncion Gomez-Perez and Oscar Corcho and Mariano Fernandez-Lopez",
	citeulike-article-id = "587164",
	howpublished = "Hardcover",
	isbn = "1852335513",
	keywords = "ontology, semantic\_web",
	month = "July",
	posted-at = "2009-06-30 19:52:40",
	priority = "0",
	publisher = "Springer",
	title = "Ontological Engineering : with examples from the areas of Knowledge Management, e-Commerce and the Semantic Web. First Edition (Advanced Information and Knowledge Processing)",
	url = "http://www.amazon.com/exec/obidos/redirect?tag=citeulike07-20&path=ASIN/1852335513",
	year = "2004"
}

@Article{ citeulike:4783697,
	abstract = "Contribution to a special issue on image access. Reports on a quantitative categorical analysis of metadata elements in the Dublin Core, VRA Core, REACH and EAD metadata schemas, all of which can be used for organizing and describing images. Found that each of the examined metadata schemas contains elements that support the discovery, use, authentication and administration of images, and that the number and proportion of elements supporting functions in these classes varies per schema. Introduces a new schema comparison methodology and explores the development of a class oriented functional metadata schema for controlling images across multiple domains. (Original abstract - amended)",
	author = "Jane Greenberg",
	citeulike-article-id = "4783697",
	citeulike-linkout-0 = "http://dx.doi.org/10.1002/asi.1170.abs",
	doi = "10.1002/asi.1170.abs",
	journal = "Journal of the American Society for Information Science and Technology",
	keywords = "image, metadata, metadata\_standards",
	number = "11",
	pages = "917--924",
	posted-at = "2009-09-25 21:25:13",
	priority = "0",
	title = "A quantitative categorical analysis of metadata elements in image-applicable metadata schemas",
	url = "http://dx.doi.org/10.1002/asi.1170.abs",
	volume = "52",
	year = "2001",
	file = "{:references:metadata.use:greenberg.2001.quantitative.metadata.element.pdf|}"
}

@Article{ citeulike:4052132,
	abstract = "This research explores the capabilities of two Dublin Core automatic metadata generation applications, Klarity and DC-dot. The top level Web page for each resource, from a sample of 29 resources obtained from National Institute of Environmental Health Sciences (NIEHS), was submitted to both generators. Results indicate that extraction processing algorithms can contribute to useful automatic metadata generation. Results also indicate that harvesting metadata from META tags created by humans can have a positive impact on automatic metadata generation. The study identifies several ways in which automatic metadata generation applications can be improved and highlights several important areas of research. The conclusion is that integrating extraction of harvesting methods will be the best approach to creating optimal metadata, and more research is needed to identify when to apply which method.",
	author = "Jane Greenberg",
	citeulike-article-id = "4052132",
	doi = "10.1300/J141v06n04\_05",
	journal = "Journal of Library Metadata",
	keywords = "extraction, harvest, metadata",
	number = "4",
	pages = "59--82",
	posted-at = "2009-06-05 14:37:11",
	priority = "2",
	publisher = "Routledge",
	title = "Metadata Extraction and Harvesting",
	url = "http://dx.doi.org/10.1300/J141v06n04\_05",
	volume = "6",
	year = "2004",
	file = "{:references:001:greenberg04metadata.pdf|}"
}

@Article{ citeulike:4783742,
	abstract = "This paper reports on the automatic metadata generation applications (AMeGA) project's metadata expert survey. Automatic metadata generation research is reviewed and the study's methods, key findings and conclusions are presented. Participants anticipate greater accuracy with automatic techniques for technical metadata (e.g., ID, language, and format metadata) compared to metadata requiring intellectual discretion (e.g., subject and description metadata). Support for implementing automatic techniques paralleled anticipated accuracy results. Metadata experts are in favour of using automatic techniques, although they are generally not in favour of eliminating human evaluation or production for the more intellectually demanding metadata. Results are incorporated into Version 1.0 of the Recommended Functionalities for automatic metadata generation applications (Appendix A).",
	author = "Jane Greenberg and Kristina Spurgin and Abe Crystal",
	citeulike-article-id = "4783742",
	journal = "International Journal of Metadata, Semantics and Ontologies",
	number = "1",
	pages = "3--20",
	posted-at = "2009-06-30 06:28:48",
	priority = "2",
	title = "Functionalities for automatic metadata generation applications: a survey of metadata experts' opinions",
	volume = "1",
	year = "2006",
	file = "{:references:metadata.autogen:autogen01.pdf|}"
}

@Book{ citeulike:1940300,
	abstract = "{<P>Interested in how an efficient search engine works? Want to know what algorithms are used to rank resulting documents in response to user requests? The authors answer these and other key information retrieval design and implementation questions.</P> <P>This book is not yet another high level text. Instead, algorithms are thoroughly described, making this book ideally suited for both computer science students and practitioners who work on search-related applications. As stated in the foreword, this book provides a current, broad, and detailed overview of the field and is the only one that does so. Examples are used throughout to illustrate the algorithms.</P> <P>The authors explain how a query is ranked against a document collection using either a single or a combination of retrieval strategies, and how an assortment of utilities are integrated into the query processing scheme to improve these rankings. Methods for building and compressing text indexes, querying and retrieving documents in multiple languages, and using parallel or distributed processing to expedite the search are likewise described. </P> <P>This edition is a major expansion of the one published in 1998. Besides updating the entire book with current techniques, it includes new sections on language models, cross-language information retrieval, peer-to-peer processing, XML search, mediators, and duplicate document detection. </P>}",
	author = "David A. Grossman and Ophir Frieder",
	citeulike-article-id = "1940300",
	howpublished = "Paperback",
	isbn = "1402030045",
	keywords = "information\_retrieval",
	month = "December",
	posted-at = "2009-06-30 20:19:32",
	priority = "2",
	publisher = "Springer",
	title = "Information Retrieval: Algorithms and Heuristics (The Information Retrieval Series)(2nd Edition)",
	url = "http://www.amazon.com/exec/obidos/redirect?tag=citeulike07-20&path=ASIN/1402030045",
	year = "2004"
}

@Misc{ citeulike:3861904,
	author = "Tony Gill",
	citeulike-article-id = "3861904",
	citeulike-linkout-0 = "http://www.firstmonday.org/issues/issue9\\_5/gill/",
	howpublished = "http://www.firstmonday.org/issues/issue9\\\_5/gill/",
	keywords = "cidoc",
	month = "May",
	posted-at = "2010-01-15 02:52:17",
	priority = "0",
	title = "Building semantic bridges between museums, libraries and archives",
	url = "http://www.firstmonday.org/issues/issue9_5/gill/",
	year = "2004",
	file = "{:references:archive.ontology:gill.2004.first.monday.build\_semantic\_bridges\_cidoc\_crm.pdf|}"
}

@Article{ citeulike:1725563,
	abstract = "In this article, we elaborate on the meaning of quality in digital libraries (DLs) by proposing a model that is deeply grounded in a formal framework for digital libraries: 5S (Streams, Structures, Spaces, Scenarios, and Societies). For each major DL concept in the framework we formally define a number of dimensions of quality and propose a set of numerical indicators for those quality dimensions. In particular, we consider key concepts of a minimal DL: catalog, collection, digital object, metadata specification, repository, and services. Regarding quality dimensions, we consider: accessibility, accuracy, completeness, composability, conformance, consistency, effectiveness, efficiency, extensibility, pertinence, preservability, relevance, reliability, reusability, significance, similarity, and timeliness. Regarding measurement, we consider characteristics like: response time (with regard to efficiency), cost of migration (with respect to preservability), and number of service failures (to assess reliability). For some key DL concepts, the (quality dimension, numerical indicator) pairs are illustrated through their application to a number of ” real-world” digital libraries. We also discuss connections between the proposed dimensions of DL quality and an expanded version of a workshop's consensus view of the life cycle of information in digital libraries. Such connections can be used to determine when and where quality issues can be measured, assessed, and improved -- as well as how possible quality problems can be prevented, detected, and eliminated.",
	address = "Tarrytown, NY, USA",
	author = "M. Goncalves and B. Moreira and E. Fox and L. Watson",
	citeulike-article-id = "1725563",
	citeulike-linkout-0 = "http://portal.acm.org/citation.cfm?id=1241319",
	citeulike-linkout-1 = "http://dx.doi.org/10.1016/j.ipm.2006.11.010",
	citeulike-linkout-2 = "http://linkinghub.elsevier.com/retrieve/pii/S030645730600197X",
	doi = "10.1016/j.ipm.2006.11.010",
	issn = "03064573",
	journal = "Information Processing \& Management",
	keywords = "assessment, digital\_library",
	month = "September",
	number = "5",
	pages = "1416--1437",
	posted-at = "2010-01-15 14:01:21",
	priority = "2",
	publisher = "Pergamon Press, Inc.",
	title = " ” What is a good digital library?” -- A quality model for digital libraries",
	url = "http://dx.doi.org/10.1016/j.ipm.2006.11.010",
	volume = "43",
	year = "2007"
}

@Article{ citeulike:6994468,
	abstract = "In this article, an approach to personal information management is described that is based on Semantic",
	author = {William I. Grosky and Farshad Fotouhi and Bodo H{\"u}semann Informationsfabrik Gmbh M{\"u}nster},
	citeulike-article-id = "6994468",
	citeulike-linkout-0 = "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.113.9128",
	keywords = "ontology, ontology\_construction, ontomedia",
	posted-at = "2010-04-10 03:50:27",
	priority = "2",
	title = "Table of Contents 1 OntoMedia—Semantic Multimedia Metadata Integration and Organization",
	url = "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.113.9128"
}

@TechReport{ citeulike:1556975,
	abstract = "This is a specification of a precise semantics, and corresponding complete systems of inference rules, for the Resource Description Framework (RDF) and RDF Schema (RDFS).",
	citeulike-article-id = "1556975",
	citeulike-linkout-0 = "http://www.w3.org/TR/rdf-mt/",
	day = "10",
	editor = "Patrick Hayes",
	howpublished = "http://www.w3.org/TR/rdf-mt/",
	institution = "W3C",
	keywords = "rdf, semantic",
	month = "February",
	posted-at = "2010-01-15 19:43:02",
	priority = "2",
	publisher = "World Wide Web Consortium",
	series = "W3C Recommendation",
	title = "RDF Semantics",
	url = "http://www.w3.org/TR/rdf-mt/",
	year = "2004"
}

@Article{ citeulike:6545091,
	abstract = "There has been notably little convergence between information organization and information use studies. A framework for explicating the contextual interplay of information interactions and infrastructures of information, and more specifically the interface of information work and knowledge organization systems, is proposed. The theoretical foundations of the framework are based on systems theory and ecological approach. It is suggested that the interplay of information use and information infrastructures may be conceptualized as a systemic interaction, which is driven by the simultaneous influence of human activity related warrants and infrastructural affordances and constraints. The model provides an instrument that explicates the interplay of human information use and information infrastructures. 10.1177/0165551509336705",
	author = "Isto Huvila",
	citeulike-article-id = "6545091",
	citeulike-linkout-0 = "http://dx.doi.org/10.1177/0165551509336705",
	citeulike-linkout-1 = "http://jis.sagepub.com/cgi/content/abstract/35/6/695",
	day = "1",
	doi = "10.1177/0165551509336705",
	journal = "Journal of Information Science",
	month = "December",
	number = "6",
	pages = "695--708",
	posted-at = "2010-01-15 19:34:11",
	priority = "2",
	title = "Ecological framework of information interactions and information infrastructures",
	url = "http://dx.doi.org/10.1177/0165551509336705",
	volume = "35",
	year = "2009"
}

@InProceedings{ citeulike:4109359,
	abstract = {The CORES metadata schemas registry is designed to enable users to discover and navigate metadata element sets. The paper reflects on some of the experiences of implementing the registry, and examines some of the issues of promoting such services in the context of a "partially Semantic Web" where metadata applications are evolving and many have not yet adopted the RDF model.},
	author = {Rachel Heery and Pete Johnston and Csaba F{\"u}l{\"o}p and Andr{\'a}s Micsik},
	booktitle = "DCMI '03: Proceedings of the 2003 international conference on Dublin Core and metadata applications",
	citeulike-article-id = "4109359",
	isbn = "0974530301",
	keywords = "cores, metadata, registry, semantic, web",
	location = "Seattle, Washington",
	pages = "1--8",
	posted-at = "2009-06-05 14:39:45",
	priority = "2",
	publisher = "Dublin Core Metadata Initiative",
	title = "Metadata schema registries in the partially Semantic web: the CORES experience",
	url = "http://portal.acm.org/citation.cfm?id=1383296.1383299",
	year = "2003"
}

@Article{ citeulike:2191587,
	address = "Piscataway, NJ, USA",
	author = "James Hendler",
	citeulike-article-id = "2191587",
	doi = "10.1109/5254.920597",
	issn = "1541-1672",
	journal = "IEEE Intelligent Systems",
	keywords = "agent, semantic\_web",
	month = "March",
	number = "2",
	pages = "30--37",
	posted-at = "2009-06-30 15:41:58",
	priority = "2",
	publisher = "IEEE Educational Activities Department",
	title = "Agents and the Semantic Web",
	url = "http://dx.doi.org/10.1109/5254.920597",
	volume = "16",
	year = "2001",
	file = "{:references:semantic.web:hendler.2001.ieee.is.agents.semantic.web.pdf|}"
}

@Article{ citeulike:4200246,
	abstract = "The conversation about metadata quality has developed slowly in libraries, hindered by unexamined assumptions about metadata carrying over from experience in the MARC environment. In the wider world, discussions about functionality must drive discussions about how quality might be determined and ensured. Because the quality-enforcing structures present in the MARC worldmature standards, common documentation, and bibliographic utilitiesare lacking in the metadata world, metadata practitioners desiring to improve the quality of metadata used in their libraries must develop and proliferate their own processes of evaluation and transformation to support essential interoperability. In this article, the author endeavors to describe how those processes might be established and sustained to support metadata quality improvement.",
	author = "Diane I. Hillmann",
	citeulike-article-id = "4200246",
	doi = "10.1080/01639370802183008",
	journal = "Cataloging \& Classification Quarterly",
	keywords = "metadata, quality",
	number = "1",
	pages = "65--80",
	posted-at = "2009-06-05 14:34:42",
	priority = "0",
	publisher = "Routledge",
	title = "Metadata Quality: From Evaluation to Augmentation",
	url = "http://dx.doi.org/10.1080/01639370802183008",
	volume = "46",
	year = "2008"
}

@Book{ citeulike:687659,
	author = "Diane I. Hillman and Elaine L. Westbrooks",
	citeulike-article-id = "687659",
	howpublished = "Paperback",
	isbn = "0838908829",
	keywords = "metadata",
	posted-at = "2009-06-30 20:29:52",
	priority = "2",
	year = "2004",
	publisher = "American Library Association",
	title = "Metadata in Practice",
	url = "http://www.amazon.com/exec/obidos/redirect?tag=citeulike07-20&path=ASIN/0838908829"
}

@Book{ citeulike:1217375,
	author = "Baden Hughes",
	citeulike-article-id = "1217375",
	journal = ": Digital Libraries: International Collaboration and Cross-Fertilization",
	keywords = "evaluation, metadata, metadata\_quality",
	pages = "320--329",
	posted-at = "2009-07-01 20:59:54",
	priority = "3",
	title = "Metadata Quality Evaluation: Experience from the Open Language Archives Community",
	url = "http://www.springerlink.com/content/4kaxeu5p2fb2nac1 ",
	year = "2004"
}

@Book{ citeulike:209816,
	author = "Chris Hart",
	citeulike-article-id = "209816",
	citeulike-linkout-0 = "http://www.amazon.ca/exec/obidos/redirect?tag=citeulike09-20\&amp;path=ASIN/0761959750",
	citeulike-linkout-1 = "http://www.amazon.de/exec/obidos/redirect?tag=citeulike01-21\&amp;path=ASIN/0761959750",
	citeulike-linkout-2 = "http://www.amazon.fr/exec/obidos/redirect?tag=citeulike06-21\&amp;path=ASIN/0761959750",
	citeulike-linkout-3 = "http://www.amazon.jp/exec/obidos/ASIN/0761959750",
	citeulike-linkout-4 = "http://www.amazon.co.uk/exec/obidos/ASIN/0761959750/citeulike00-21",
	citeulike-linkout-5 = "http://www.amazon.com/exec/obidos/redirect?tag=citeulike07-20\&path=ASIN/0761959750",
	citeulike-linkout-6 = "http://www.worldcat.org/isbn/0761959750",
	citeulike-linkout-7 = "http://books.google.com/books?vid=ISBN0761959750",
	citeulike-linkout-8 = "http://www.amazon.com/gp/search?keywords=0761959750\&index=books\&linkCode=qs",
	citeulike-linkout-9 = "http://www.librarything.com/isbn/0761959750",
	day = "01",
	howpublished = "Paperback",
	isbn = "0761959750",
	keywords = "literature\_review",
	month = "March",
	posted-at = "2009-12-22 14:24:34",
	priority = "0",
	publisher = "SAGE Publications",
	title = "Doing a Literature Review : Releasing the Social Science Research Imagination",
	url = "http://www.amazon.com/exec/obidos/redirect?tag=citeulike07-20&path=ASIN/0761959750",
	year = "1999",
	pages = "230"
}

@Article{ citeulike:1019370,
	abstract = "This article presents the semantic portal MuseumFinland for publishing heterogeneous museum collections on the Semantic Web. It is shown how museums with their semantically rich and interrelated collection content can create a large, consolidated semantic collection portal together on the web. By sharing a set of ontologies, it is possible to make collections semantically interoperable, and provide the museum visitors with intelligent content-based search and browsing services to the global collection base. The architecture underlying MuseumFinland separates generic search and browsing services from the underlying application dependent schemas and metadata by a layer of logical rules. As a result, the portal creation framework and software developed has been applied successfully to other domains as well. MuseumFinland got the Semantic Web Challence Award (second prize) in 2004.",
	author = "E. Hyvonen and E. Makela and M. Salminen and A. Valo and K. Viljanen and S. Saarela and M. Junnila and S. Kettula",
	booktitle = "Selcted Papers from the International Semantic Web Conference, 2004 - ISWC, 2004",
	citeulike-article-id = "1019370",
	citeulike-linkout-0 = "http://dx.doi.org/10.1016/j.websem.2005.05.008",
	citeulike-linkout-1 = "http://linkinghub.elsevier.com/retrieve/pii/S157082680500017X",
	citeulike-linkout-2 = "http://www.sciencedirect.com/science/article/B758F-4GXVGBB-1/2/31b4fa1159b5bb40335853c6c25ecd50",
	doi = "10.1016/j.websem.2005.05.008",
	issn = "15708268",
	journal = "Web Semantics: Science, Services and Agents on the World Wide Web",
	keywords = "museum, ontology, semantic\_web",
	month = "October",
	number = "2-3",
	pages = "224--241",
	posted-at = "2010-01-15 02:37:08",
	priority = "0",
	title = "MuseumFinland—Finnish museums on the semantic web",
	url = "http://dx.doi.org/10.1016/j.websem.2005.05.008",
	volume = "3",
	year = "2005",
	file = "{:references:ontology:hyvonen.2005.jows.museum\_finland\_semantic\_web.pdf|}"
}

@Article{ citeulike6580903,
	abstract = "this paper, we want to exploit this similarity to apply reasoning techniques from description logics to semi-structured data.",
	author = "V. Informatik and D. Michaeli and W. Nutt and Y. Sagiv and David Michaeli and Y. {Werner Nutt}",
	citeulike-article-id = "6580903",
	citeulike-linkout-0 = "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.36.2855",
	keywords = "description\_logics, logic, semi-structure",
	posted-at = "2010-01-22 20:55:44",
	priority = "2",
	title = "Classification Rules for Semi-Structured Data",
	url = "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.36.2855",
	year = "1997"
}

@Book{ citeulike:3816245,
	address = "88 Post Road West, Westport, CT 06881",
	author = "Sheila S. Intner and Susan I. Lazinger and Jean Weihs",
	citeulike-article-id = "3816245",
	citeulike-linkout-0 = "http://www.webology.ir/2006/v3n3/bookreview5.html",
	isbn = "1591581451",
	keywords = "metadata",
	posted-at = "2009-12-20 14:41:59",
	priority = "0",
	publisher = "Libraries Unlimited",
	title = "Metadata and Its Impact on Libraries",
	url = "http://www.webology.ir/2006/v3n3/bookreview5.html",
	year = "2006"
}

@Article{ citeulike:6567098,
	abstract = "Metadata can be used to precisely represent data semantics. It can also serve to improve data sharing and exchange. Because the various types of metadata are created in different ways, they can suffer from a problem of inconsistency. Recently, metadata gateway methods have been researched to solve this problem. However, the performance of the existing approaches based on metadata schemas is poor and their maintenance (adaptation of metadata changes) is time consuming. In this paper, a novel message conversion system is proposed, which functions by separating the heterogeneous mapping information from the mapping rules of the metadata, in order to overcome the drawbacks of the existing metadata gateway methods. The proposed system controls the standardized data elements dynamically based on the Metadata Registry (MDR), which is one of the most important elements of the ISO/IEC 11179 standard. The problems associated with adding supplementary metadata are resolved, since the standard provides for incorporating additional data elements created in the future. MSDL is defined as a protocol which can be used for exchanging messages between heterogeneous systems, and which ensures that all of the systems have their own independent metadata schemas. 10.1177/0165551505055403",
	author = "Dongwon Jeong and Peter H. In and Fran Jarnjak and Young-Gab Kim and Doo-Kwon Baik",
	citeulike-article-id = "6567098",
	citeulike-linkout-0 = "http://dx.doi.org/10.1177/0165551505055403",
	citeulike-linkout-1 = "http://jis.sagepub.com/cgi/content/abstract/31/5/394",
	day = "1",
	doi = "10.1177/0165551505055403",
	journal = "Journal of Information Science",
	keywords = "metadata\_repository, metadata\_semantics",
	month = "October",
	number = "5",
	pages = "394--406",
	posted-at = "2010-01-20 15:21:39",
	priority = "2",
	title = "A message conversion system, XML-based metadata semantics description language and metadata repository",
	url = "http://dx.doi.org/10.1177/0165551505055403",
	volume = "31",
	year = "2005"
}

@Article{ citeulike:6551053,
	abstract = "Digital data repositories ought to support immediate operational needs and long-term project goals. This paper presents the Dryad repository's metadata best practice balancing of these two needs. The paper reviews background work exploring the meaning of science, characterizing data, and highlighting data curation metadata challenges. The Dryad repository is introduced, and the initiative's metadata best practice and underlying rationales are described. Dryad's metadata approach includes two prongs: one addressing the long-term goal to align with the Semantic Web via a metadata application profile; and another addressing the immediate need to make content available in DSpace via an extensible markup language (XML) schema. The conclusion summarizes limitations and advantages of the two prongs underlying Dryad's metadata effort.",
	author = "Jane Greenberg and Hollie C. White and Sarah Carrier and Ryan Scherle",
	citeulike-article-id = "6551053",
	citeulike-linkout-0 = "http://dx.doi.org/10.1080/19386380903405090",
	doi = "10.1080/19386380903405090",
	journal = "Journal of Library Metadata",
	keywords = "metadata, repository",
	number = "3",
	pages = "194--212",
	posted-at = "2010-01-16 17:02:49",
	priority = "2",
	publisher = "Routledge",
	title = "A Metadata Best Practice for a Scientific Data Repository",
	url = "http://dx.doi.org/10.1080/19386380903405090",
	volume = "9",
	year = "2009"
}

@Article{ citeulike:6548921,
	abstract = "Purpose -- The purpose of this paper is to discuss issues associated with open access {(OA)} to electronic theses and dissertations {(ETDs)} and to describe the University of Waterloo E-thesis Project and its partnerships with Theses Canada and the Networked Digital Library of Theses and Dissertations. Design/methodology/approach -- {UW} E-thesis Project decisions on issues associated with electronic submission and {OA} are presented. Partnerships with Theses Canada and the Networked Digital Library of Theses and Dissertations are described and the goals and activities of these organizations are outlined. Findings -- Author-created metadata form the {UW} E-theses searchable database of records that link to theses in full text. The metadata are {OAI} compliant and are harvested by Theses Canada and the {ETD} Union Catalog. The E-theses Project supports authors' rights while minimizing access restrictions and encourages innovations while respecting the value of gradually evolving thesis standards and traditions. The success of the {UW} E-thesis Project illustrates that progress can be made toward the {OA} paradigm for theses and dissertations while upholding perennial values. Collaborations with like-minded organizations support and advance these goals. Originality/value -- Academic librarians and graduate studies officers will find this e-thesis project description and this discussion of issues relevant to planning and maintaining electronic thesis submission and access systems at their own universities. The descriptions of the benefits of the partnerships may prompt readers to make similar connections themselves.",
	author = "Christine Jewell and William Oldfield and Sharon Reeves",
	citeulike-article-id = "6548921",
	citeulike-linkout-0 = "http://dx.doi.org/10.1108/07378830610669565",
	citeulike-linkout-1 = "http://www.emeraldinsight.com/10.1108/07378830610669565",
	doi = "10.1108/07378830610669565",
	journal = "Library Hi Tech",
	keywords = "canada, delivery, digital, document, electronic, file-import-10-01-16, libraries, theses",
	number = "2",
	pages = "183--196",
	posted-at = "2010-01-16 01:43:14",
	priority = "2",
	title = "University of Waterloo electronic theses: issues and partnerships",
	url = "http://dx.doi.org/10.1108/07378830610669565",
	volume = "24",
	year = "2006"
}

@Article{ citeulike:5153351,
	abstract = "Purpose: To explain the background, functionality, and content of the CARL metadata harvester and search service, http: //carl-abrc-oai.lib.sfu.ca/, and to outline plans for improving the service. Design/methodology/approach: This case study employs simple statistical analyses to a set of harvested metadata. Findings: This paper documents the use of unqualified Dublin Core (uDC) elements in the metadata harvested from the repositories participating in the CARL harvester, and identifies patterns in the use of that metadata. It also compares these findings with a similar study, and identifies areas for further research. Research limitations/implications: This paper is limited to discussion of the characteristics of a relatively small set of metadata collected using the Open Archives Initiative Protocol for Metadata Harvesting. However, analyses reveal some patterns in the use of this metadata that are valuable in the development of best practices for repository implementers. Practical implications: This paper documents the use of uDC elements by a specific community. Its findings will form a basis for developing mechanisms for improving the effectiveness of the metadata generated by that community and therefore the services built around that metadata. Originality/value: While there are several other studies that take an approach similar to that taken in this paper, no one has yet studied this specific data set. More generally, this paper contributes a valuable case study to research on the implementation of the Open Archives Initiative Protocol for Metadata Harvesting. (Author abstract)",
	author = "Mark Jordan",
	citeulike-article-id = "5153351",
	citeulike-linkout-0 = "http://dx.doi.org/doi:10.1108/07378830610669574",
	doi = "doi:10.1108/07378830610669574",
	journal = "Library Hi Tech,",
	number = "2",
	pages = "197--210",
	posted-at = "2010-01-15 15:03:45",
	priority = "2",
	title = "The CARL metadata harvester and search service",
	url = "http://dx.doi.org/doi:10.1108/07378830610669574",
	volume = "24",
	year = "2006"
}

@Article{ citeulike:3906895,
	abstract = "The authors demonstrate how to use Semantic Web technologies to improve the state-of-the-art in online learning environments and bridge the gap between students on the one hand, and authors or teachers on the other. The ontological framework presented here helps formalize learning object context as a complex interplay of different learning-related elements and shows how we can use semantic annotation to interrelate diverse learning artifacts. On top of this framework, the authors implemented several feedback channels for educators to improve the delivery of future Web-based courses.",
	address = "Los Alamitos, CA, USA",
	author = "Jelena Jovanovi\&\#263; and Dragan Ga\&\#353;evi\&\#263; and Christopher Brooks and Vladan Deved\&\#382;i\&\#263; and Marek Hatala and Timmy Eap and Griff Richards",
	citeulike-article-id = "3906895",
	doi = "10.1109/MIC.2007.116",
	issn = "1089-7801",
	journal = "IEEE Internet Computing",
	keywords = "learning\_content, semantic\_web",
	number = "5",
	pages = "45--53",
	posted-at = "2009-06-30 07:19:55",
	priority = "2",
	publisher = "IEEE Computer Society",
	title = "Using Semantic Web Technologies to Analyze Learning Content",
	url = "http://dx.doi.org/10.1109/MIC.2007.116",
	volume = "11",
	year = "2007",
	file = "{:references:metadata.autogen:autogen07.pdf|}"
}

@Article{ citeulike:5894543,
	author = "Charlotte Jenkins and Mike Jackson and Peter Burden and Jon Wallis",
	citeulike-article-id = "5894543",
	citeulike-linkout-0 = "http://74.125.155.132/scholar?q=cache:6NBDemQboWwJ:scholar.google.com/+dewey+decimal+system\&\\#38;hl=en",
	day = "8",
	journal = "Computer Networks",
	keywords = "metadata, metadata\_generation, rdf",
	month = "May",
	pages = "1305--1320",
	posted-at = "2010-01-15 02:58:35",
	priority = "3",
	title = "Automatic RDF Metadata Generation for Resource Discovery",
	url = "http://74.125.155.132/scholar?q=cache:6NBDemQboWwJ:scholar.google.com/+dewey+decimal+system&#38;hl=en",
	volume = "31",
	year = "2003",
	file = "{:references:metadata.autogen:jekins.1999.cn.rdf\_metadata\_autogen.pdf|}"
}

@Proceedings{ 10.1109/HICSS.2001.927053,
	author = "D. McKnight And N.L. Chervany",
	title = "Conceptualizing Trust: A Typology and E-Commerce Customer Relationships Model",
	journal = "Hawaii International Conference on System Sciences",
	volume = "7",
	issn = "1530-1605",
	year = "2001",
	pages = "7022--7031",
	doi = "http://doi.ieeecomputersociety.org/10.1109/HICSS.2001.927053",
	publisher = "IEEE Computer Society",
	address = "Los Alamitos, CA, USA"
}

@Article{ citeulike:6654507,
	abstract = "Websites that provide content creation and sharing features have become quite popular recently. These sites allow users to categorize and browse content using tags' or free-text keyword topics. Since users contribute and tag social media content across a variety of social web platforms, creating new knowledge from distributed tag data has become a matter of performing various tasks, including publishing, aggregating, integrating, and republishing tag data. However, there are a number of issues in relation to data sharing and interoperability when processing tag data across heterogeneous tagging platforms. In this paper we introduce a semantic tag model that aims to explicitly offer the necessary structure, semantics and relationships between tags. This approach provides an improved opportunity for representing tag data in the form of reusable constructs at a semantic level. We also demonstrate a prototype that consumes and makes use of shared tag metadata across heterogeneous sources. 10.1177/0165551509346785",
	author = "Hak-Lae Kim and Stefan Decker and John G. Breslin",
	citeulike-article-id = "6654507",
	citeulike-linkout-0 = "http://dx.doi.org/10.1177/0165551509346785",
	citeulike-linkout-1 = "http://jis.sagepub.com/cgi/content/abstract/36/1/57",
	day = "1",
	doi = "10.1177/0165551509346785",
	journal = "Journal of Information Science",
	keywords = "semantics",
	month = "February",
	number = "1",
	pages = "57--72",
	posted-at = "2010-02-11 16:44:59",
	priority = "5",
	title = "Representing and sharing folksonomies with semantics",
	url = "http://dx.doi.org/10.1177/0165551509346785",
	volume = "36",
	year = "2010",
	file = "{:references:metadata.semantics:kim.2010.jis.tag\_semantics.pdf|}"
}

@Article{ citeulike:6554105,
	author = "Dimitrios A. Koutsomitropoulos and Georgia D. Solomou and Theodore S. Papatheodorou",
	citeulike-article-id = "6554105",
	citeulike-linkout-0 = "http://journals.tdl.org/jodi/article/viewArticle/693",
	comment = {Digital collections often foster a large number of digital resources that need to be efficiently managed, described and disseminated. Metadata play a key role in these tasks as they offer the basis upon which more advanced services can be built. However, it is not always the case that such collections' metadata expose explicit or even well-structured semantics. Ways to bridge this "semantic gap" are increasingly being sought, as our review of the current state-of-the-art reveals. Most importantly though, in this paper we comment on two well-known metadata standards, popular in cultural heritage applications, namely {CIDOC-CRM} and Dublin Core; as diverse their scope may be, we nevertheless show how applications can benefit from a transition to explicit semantic structures in these domains, in a way as painless as possible and conformant to Semantic Web standards. We conclude by presenting a concrete, prototype implementation that serves as a proof-of-concept about the ideas argued for.},
	journal = "Journal of Digital Information",
	keywords = "file-import-10-01-17",
	number = "6",
	posted-at = "2010-01-17 22:38:33",
	priority = "2",
	title = "Metadata and Semantics in Digital Object Collections: A Case-Study on CIDOC-CRM and Dublin Core and a Prototype Implementation",
	url = "http://journals.tdl.org/jodi/article/viewArticle/693",
	volume = "10",
	year = "2009",
	file = "{:references:archive.ontology:koutsomitropoulos.2009.jodi.cidoc\_crm\_dc.pdf|}"
}

@InCollection{ citeulike:6235902,
	abstract = "Ontologies on the Semantic Web form a basis for representing human-conceivable knowledge in a machine-understandable manner. Ontology development for a specific knowledge domain is however a difficult task, because the produced representation has to be adequately detailed and broad enough at the same time. The CIDOC-CRM is such an ontology, pertaining to cultural heritage, which we align to the Semantic Web environment: first transforming it to OWL and then profiling it not in the usual flat metadata sense, but by refining and extending its conceptual structures, taking advantage of OWL semantics. This kind of profiling maintains applicability of the model, while enabling more expressive reasoning tasks. To this end, we construct a mechanism for acquiring implied and web-distributed information that is used to conduct and present a series of experimental inferences on the CRM profiled form.",
	author = "Dimitrios A. Koutsomitropoulos and George E. Paloukis and Theodore S. Papatheodorou",
	citeulike-article-id = "6235902",
	citeulike-linkout-0 = "http://dx.doi.org/10.1007/978-0-387-77745-0\\_3",
	citeulike-linkout-1 = "http://www.springerlink.com/content/m68460003gu7772p",
	doi = "10.1007/978-0-387-77745-0\_3",
	journal = "Metadata and Semantics",
	pages = "23--33",
	posted-at = "2010-01-16 02:27:49",
	priority = "2",
	title = "Semantic Application Profiles: A Means to Enhance Knowledge Discovery in Domain Metadata Models",
	url = "http://dx.doi.org/10.1007/978-0-387-77745-0_3",
	year = "2009",
	file = "{:references:archive.ontology:koutsomitropoulos.2009.semantic\_application\_profile.pdf|}"
}

@Article{ citeulike:1216193,
	address = "Secaucus, NJ, USA",
	author = "Latifur Khan and Dennis Mcleod and Eduard Hovy",
	citeulike-article-id = "1216193",
	doi = "10.1007/s00778-003-0105-1",
	issn = "1066-8888",
	journal = "The VLDB Journal",
	keywords = "ontology",
	month = "January",
	number = "1",
	pages = "71--85",
	posted-at = "2009-06-30 11:52:57",
	priority = "2",
	publisher = "Springer-Verlag New York, Inc.",
	title = "Retrieval effectiveness of an ontology-based model for information selection",
	url = "http://dx.doi.org/10.1007/s00778-003-0105-1",
	volume = "13",
	year = "2004",
	file = "{:references:metadata.autogen:autogen10.pdf|}"
}

@Article{ citeulike:328465,
	author = "Latifur Khan and Dennis Mcleod and Eduard Hovy",
	citeulike-article-id = "328465",
	doi = "10.1007/s10844-005-0188-9",
	issn = "0925-9902",
	journal = "Journal of Intelligent Information Systems",
	keywords = "ontology",
	month = "September",
	number = "2",
	pages = "181--205",
	posted-at = "2009-06-30 11:50:38",
	priority = "2",
	publisher = "Kluwer Academic Publishers",
	title = "A Framework for Effective Annotation of Information from Closed Captions Using Ontologies",
	url = "http://dx.doi.org/10.1007/s10844-005-0188-9",
	volume = "25",
	year = "2005",
	file = "{:references:metadata.autogen:autogen09.pdf|}"
}

@PhDThesis{ citeulike:4889962,
	author = "Michel Klein",
	citeulike-article-id = "4889962",
	citeulike-linkout-0 = "http://www.cs.vu.nl/~mcaklein/thesis/",
	month = "August",
	posted-at = "2009-11-25 14:37:08",
	priority = "2",
	school = "Vrije Universiteit Amsterdam",
	title = "Change Management for Distributed Ontologies",
	url = "http://www.cs.vu.nl/~mcaklein/thesis/",
	year = "2004"
}

@InCollection{ citeulike:5800838,
	author = "S. A. Knight and A. Spink",
	booktitle = "Web Search",
	citeulike-article-id = "5800838",
	citeulike-linkout-0 = "http://dx.doi.org/10.1007/978-3-540-75829-7\\_12",
	doi = "10.1007/978-3-540-75829-7\_12",
	editor = "A. Spink and M. Zimmer",
	issn = "1568-1300",
	keywords = "information\_behaviour, model",
	pages = "209--234",
	posted-at = "2009-09-18 01:08:52",
	priority = "3",
	publisher = "Springer Berlin Heidelberg",
	title = "Toward a Web Search Information Behavior Model",
	url = "http://dx.doi.org/10.1007/978-3-540-75829-7_12",
	year = "2008",
	file = "{:references:information.behaviour:web.search.information.behavior.model.pdf|}"
}

@Article{ citeulike:221379,
	abstract = "The article discusses the users' perspective of information seeking. A model of the information search process is presented derived from a series of five studies investigating common experiences of users in information seeking situations. The cognitive and affective aspects of the process of information seeking suggest a gap between the users' natural process of information use and the information system and intermediaries' traditional patterns of information provision. \&copy; 1991 John Wiley \&amp; Sons, Inc.",
	author = "Carol C. Kuhlthau",
	citeulike-article-id = "221379",
	citeulike-linkout-0 = "http://dx.doi.org/10.1002/(SICI)1097-4571(199106)42:5%3C361::AID-ASI6%3E3.0.CO;2-%23",
	citeulike-linkout-1 = "http://www3.interscience.wiley.com/cgi-bin/abstract/10049443/ABSTRACT",
	doi = "10.1002/(SICI)1097-4571(199106)42:5%3C361::AID-ASI6%3E3.0.CO;2-%23",
	issn = "1097-4571",
	journal = "Journal of the American Society for Information Science",
	keywords = "informationh\_behavior, model",
	month = "January",
	number = "5",
	pages = "361--371",
	posted-at = "2009-09-17 14:36:15",
	priority = "3",
	title = "Inside the search process: Information seeking from the user's perspective",
	url = "http://dx.doi.org/10.1002/(SICI)1097-4571(199106)42:5%3C361::AID-ASI6%3E3.0.CO;2-%23",
	volume = "42",
	year = "1999",
	file = "{:references:information.behaviour:kuhlthau.inside.the.search.process.pdf|}"
}

@Book{ citeulike:5781773,
	author = "Sebastian R. Kruk and W. D. McDaniel",
	citeulike-article-id = "5781773",
	citeulike-linkout-0 = "http://www.worldcat.org/isbn/9783540854333",
	citeulike-linkout-1 = "http://books.google.com/books?vid=ISBN9783540854333",
	citeulike-linkout-2 = "http://www.amazon.com/gp/search?keywords=9783540854333\&index=books\&linkCode=qs",
	citeulike-linkout-3 = "http://www.librarything.com/isbn/9783540854333",
	citeulike-linkout-4 = "http://www.worldcat.org/oclc/243822188",
	isbn = "9783540854333",
	keywords = "digital, library, semantic",
	posted-at = "2009-09-14 20:08:58",
	priority = "2",
	publisher = "Springer",
	title = "Semantic digital libraries",
	url = "http://www.worldcat.org/isbn/9783540854333",
	year = "2009",
	file = "{:books:semantic.digital.library:front-matter.pdf|}"
}

@Article{ citeulike:3852086,
	author = "Alfred J. Lotka",
	citeulike-article-id = "3852086",
	citeulike-linkout-0 = "http://dx.doi.org/10.1002/asi.4630280610",
	doi = "10.1002/asi.4630280610",
	journal = "J Washington Acad Sci",
	keywords = "bibliometrics",
	pages = "317--324",
	posted-at = "2010-04-07 04:25:57",
	priority = "2",
	title = "The frequency distribution of scientific productivity",
	url = "http://dx.doi.org/10.1002/asi.4630280610",
	volume = "16",
	year = "1926"
}

@Book{ citeulike:813763,
	abstract = "{This fully revised and updated second edition of <b>Understanding Digital Libraries</b> focuses on the challenges faced by both librarians and computer scientists in a field that has been dramatically altered by the growth of the Web.<br><br>At every turn, the goal is practical: to show you how things you might need to do are already being done, or how they can be done. The first part of the book is devoted to technology and examines issues such as varying media requirements, indexing and classification, networks and distribution, and presentation. The second part of the book is concerned with the human contexts in which digital libraries function. Here youll find specific and useful information on usability, preservation, scientific applications, and thorny legal and economic questions.<br><br> Useful for digital library projects in all kinds of settings, including commercial and community ventures, museums, research institutions, and schools.<br> Covers the entire spectrum of media, including text, all kinds of images, audio, and video.<br> Provides practical advice on achieving the best of what is possible while avoiding common pitfalls.<br> Filled with case studies and references to valuable outside resources.} {This fully revised and updated second edition of Understanding Digital Libraries focuses on the challenges faced by both librarians and computer scientists in a field that has been dramatically altered by the growth of the Web. At every turn, the goal is practical: to show you how things you might need to do are already being done, or how they can be done. The first part of the book is devoted to technology and examines issues such as varying media requirements, indexing and classification, networks and distribution, and presentation. The second part of the book is concerned with the human contexts in which digital libraries function. Here you'll find specific and useful information on usability, preservation, scientific applications, and thorny legal and economic questions.}",
	author = "Michael Lesk",
	citeulike-article-id = "813763",
	citeulike-linkout-0 = "http://www.amazon.ca/exec/obidos/redirect?tag=citeulike09-20\&amp;path=ASIN/1558609245",
	citeulike-linkout-1 = "http://www.amazon.de/exec/obidos/redirect?tag=citeulike01-21\&amp;path=ASIN/1558609245",
	citeulike-linkout-2 = "http://www.amazon.fr/exec/obidos/redirect?tag=citeulike06-21\&amp;path=ASIN/1558609245",
	citeulike-linkout-3 = "http://www.amazon.co.uk/exec/obidos/ASIN/1558609245/citeulike00-21",
	citeulike-linkout-4 = "http://www.amazon.com/exec/obidos/redirect?tag=citeulike07-20\&path=ASIN/1558609245",
	citeulike-linkout-5 = "http://www.worldcat.org/isbn/1558609245",
	citeulike-linkout-6 = "http://books.google.com/books?vid=ISBN1558609245",
	citeulike-linkout-7 = "http://www.amazon.com/gp/search?keywords=1558609245\&index=books\&linkCode=qs",
	citeulike-linkout-8 = "http://www.librarything.com/isbn/1558609245",
	citeulike-linkout-9 = "http://www.worldcat.org/oclc/56911889",
	day = "02",
	howpublished = "Paperback",
	isbn = "1558609245",
	keywords = "digital\_library",
	month = "December",
	posted-at = "2010-04-20 20:57:09",
	priority = "0",
	publisher = "Elsevier",
	title = "Understanding digital libraries",
	url = "http://www.amazon.com/exec/obidos/redirect?tag=citeulike07-20&path=ASIN/1558609245",
	year = "2004"
}

@Article{ citeulike:4981688,
	abstract = "Reports results of a study to examine interindexer consistency (the degree to which indexers, when assigning terms to a chosen record, will choose the same terms to reflect that record) in the PsycINFO database using 60 records that were inadvertently processed twice between 1996 and 1998. Five aspects of interindexer consistency were analysed. Two methods were used to calculate interindexer consistency: one posited by Hooper (1965) and the other by Rollin (1981). Aspects analysed were: checktag consistency (66.24\% using Hooper's calculation and 77.17\% using Rollin's); major-to-all term consistency (49.31\% and 62.59\% respectively); overall indexing consistency (49.02\% and 63.32\%); classification code consistency (44.17\% and 45.00\%); and major-to-major term consistency (43.24\% and 56.09\%). The average consistency across all categories was 50.4\% using Hooper's method and 60.83\% using Rollin's. Although comparison with previous studies is difficult due to methodological variations in the overall study of indexing consistency and the specific characteristics of the database, results generally support previous findings when trends and similar studies are analysed. 10.1177/096100060003200102",
	author = "Kurt Leininger",
	citeulike-article-id = "4981688",
	citeulike-linkout-0 = "http://dx.doi.org/10.1177/096100060003200102",
	citeulike-linkout-1 = "http://lis.sagepub.com/cgi/content/abstract/32/1/4",
	day = "1",
	doi = "10.1177/096100060003200102",
	journal = "Journal of Librarianship and Information Science",
	keywords = "interindexer\_consistency",
	month = "March",
	number = "1",
	pages = "4--8",
	posted-at = "2010-03-14 00:15:27",
	priority = "2",
	title = "Interindexer consistency in PsycINFO",
	url = "http://dx.doi.org/10.1177/096100060003200102",
	volume = "32",
	year = "2000"
}

@InProceedings{ 4721453,
	author = "Yaobin Lu and Ling Zhao and Bin Wang",
	journal = "Advanced Management of Information for Globalized Enterprises, 2008. AMIGE 2008. IEEE Symposium on",
	title = "Exploring Factors Affecting Trust and Purchase Behavior in Virtual Communities",
	year = "2008",
	month = "sept.",
	volume = "",
	number = "",
	pages = "1--5",
	keywords = "consumer purchase decision-making process, consumers' behaviors, information intention, purchase behavior, purchase intention, trust building mechanisms, virtual communities, decision making, information networks, purchasing",
	doi = "10.1109/AMIGE.2008.ECP.11",
	ISSN = ""
}

@Article{ citeulike:6551058,
	abstract = "The University of New Mexico will mandate in 2009 that theses and dissertations be submitted in electronic form as the copy of record. These documents will reside in the university's digital repository, operated on a DSpace platform. This article reviews practices for thesis and dissertation metadata creation with a focus on DSpace instances, best practice recommendations for author-submitted metadata, recommendations for subject analysis, and training for metadata practitioners. The article recommends processes for author submission, metadata quality control and enhancement, and crosswalking of the metadata to the library's catalog to maximize discovery.",
	author = "Rebecca L. Lubas",
	citeulike-article-id = "6551058",
	citeulike-linkout-0 = "http://dx.doi.org/10.1080/19386380903405165",
	doi = "10.1080/19386380903405165",
	journal = "Journal of Library Metadata",
	keywords = "etd, metadata",
	number = "3",
	pages = "252--263",
	posted-at = "2010-01-16 17:03:40",
	priority = "2",
	publisher = "Routledge",
	title = "Defining Best Practices in Electronic Thesis and Dissertation Metadata",
	url = "http://dx.doi.org/10.1080/19386380903405165",
	volume = "9",
	year = "2009"
}

@InProceedings{ 1298431,
	address = "New York, NY, USA",
	author = "Yuangui Lei and Victoria Uren and Enrico Motta",
	booktitle = "K-CAP '07: Proceedings of the 4th international conference on Knowledge capture",
	citeulike-article-id = "6544743",
	citeulike-linkout-0 = "http://dx.doi.org/http://doi.acm.org/10.1145/1298406.1298431",
	doi = "http://doi.acm.org/10.1145/1298406.1298431",
	location = "Whistler, BC, Canada",
	pages = "135--142",
	posted-at = "2010-01-15 17:40:03",
	priority = "2",
	publisher = "ACM",
	title = "A framework for evaluating semantic metadata",
	url = "http://dx.doi.org/http://doi.acm.org/10.1145/1298406.1298431",
	year = "2007"
}

@Article{ citeulike:6544603,
	author = "Elizabeth D. Liddy",
	citeulike-article-id = "6544603",
	citeulike-linkout-0 = "http://dx.doi.org/DOI:%2010.1016/0306-4573(91)90031-G",
	citeulike-linkout-1 = "http://www.sciencedirect.com/science/article/B6VC8-469PG60-X/2/e7c57a99a38b2760902b9b5315edc224",
	doi = "DOI:%2010.1016/0306-4573(91)90031-G",
	journal = "Information Processing \& Management",
	number = "1",
	pages = "55--81",
	posted-at = "2010-01-15 17:03:51",
	priority = "2",
	title = "The discourse-level structure of empirical abstracts: an exploratory study",
	url = "http://dx.doi.org/DOI:%2010.1016/0306-4573(91)90031-G",
	volume = "27",
	year = "1991"
}

@InProceedings{ citeulike:1372858,
	abstract = "The poster reports on a project in which we are investigating methods for breaking the human metadata-generation bottleneck that plagues Digital Libraries. The research question is whether metadata elements and values can be automatically generated from the content of educational resources, and correctly assigned to mathematics and science educational materials. Natural Language Processing and Machine Learning techniques were implemented to automatically assign values of the GEMgenerate metadata element set tofor learning resources provided by the Gateway for Education (GEM), a service that offers web access to a wide range of educational materials. In a user study, education professionals evaluated the metadata assigned to learning resources by either automatic tagging or manual assignment. Results show minimal difference in the eyes of the evaluators between automatically generated metadata and manually assigned metadata.",
	address = "New York, NY, USA",
	author = "Elizabeth D. Liddy and Eileen Allen and Sarah Harwell and Susan Corieri and Ozgur Yilmazel and Ercan N. Ozgencil and Anne Diekema and Nancy Mccracken and Joanne Silverstein and Stuart Sutton",
	booktitle = "SIGIR '02: Proceedings of the 25th annual international ACM SIGIR conference on Research and development in information retrieval",
	citeulike-article-id = "1372858",
	doi = "10.1145/564376.564464",
	isbn = "1581135610",
	keywords = "ao, automatic\_metadata\_generation, evaluation",
	pages = "401--402",
	posted-at = "2009-06-30 16:48:50",
	priority = "2",
	publisher = "ACM Press",
	title = "Automatic metadata generation \& evaluation",
	url = "http://dx.doi.org/10.1145/564376.564464",
	year = "2002"
}

@Article{ citeulike:2640808,
	abstract = "Abstract\&nbsp;\&nbsp;Most information retrieval research focuses collecting documents that match the same set of concepts. This study considers a more advanced problem, namely how to discover knowledge not contained in a single source from combined historical facts. By using a well-designed core ontology in the cultural domain (CIDOC CRM, ISO21127), this study discusses the requirement for a robust inference platform for real-life knowledge discovery and integration over distributed sources. The methodology and design are justified in detail through functional requirements for an inference service with the capability of inferring new knowledge from combinations of facts distributed over different sources. A number of critical issues for developing such a robust inference platform are identified, namely (1) systematic accumulation of common concepts and inference rules; (2) extending the ontology with metaclasses; (3) accumulation of factual and categorical knowledge; (4) incorporation of fuzzy inference into the inference engine, and (5) improvement of performance and scalability in the inference engine.",
	author = "Chia-Hung Lin and Jen-Shin Hong and Martin Doerr",
	citeulike-article-id = "2640808",
	doi = "10.1007/s00799-008-0034-0",
	journal = "International Journal on Digital Libraries",
	keywords = "cidoc, cidoc\_crm",
	month = "April",
	number = "2",
	pages = "115--132",
	posted-at = "2009-06-30 16:08:42",
	priority = "2",
	title = "Issues in an inference platform for generating deductive knowledge: a case study in cultural heritage digital libraries using the CIDOC CRM",
	url = "http://dx.doi.org/10.1007/s00799-008-0034-0",
	volume = "8",
	year = "2008"
}

@Article{ citeulike:4199625,
	abstract = "Current data integration approaches by bioinformaticians frequently involve extracting data from a wide variety of public and private data repositories, each with a unique vocabulary and schema, via scripts. These separate data sets must then be normalized through the tedious and lengthy process of resolving naming differences and collecting information into a single view. Attempts to consolidate such diverse data using data warehouses or federated queries add significant complexity and have shown limitations in flexibility. The alternative of complete semantic integration of data requires a massive, sustained effort in mapping data types and maintaining ontologies. We focused instead on creating a data architecture that leverages semantic mapping of experimental metadata, to support the rapid prototyping of scientific discovery applications with the twin goals of reducing architectural complexity while still leveraging semantic technologies to provide flexibility, efficiency and more fully characterized data relationships. A metadata ontology was developed to describe our discovery process. A metadata repository was then created by mapping metadata from existing data sources into this ontology, generating RDF triples to describe the entities. Finally an interface to the repository was designed which provided not only search and browse capabilities but complex query templates that aggregate data from both RDF and RDBMS sources. We describe how this approach (i) allows scientists to discover and link relevant data across diverse data sources and (ii) provides a platform for development of integrative informatics applications. 10.1093/bib/bbp007",
	author = "Maurice Manning and Amit Aggarwal and Kevin Gao and Greg Tucker-Kellogg",
	citeulike-article-id = "4199625",
	citeulike-linkout-0 = "http://dx.doi.org/10.1093/bib/bbp007",
	citeulike-linkout-1 = "http://bib.oxfordjournals.org/cgi/content/abstract/10/2/164?etoc",
	citeulike-linkout-2 = "http://view.ncbi.nlm.nih.gov/pubmed/19304872",
	citeulike-linkout-3 = "http://www.hubmed.org/display.cgi?uids=19304872",
	day = "1",
	doi = "10.1093/bib/bbp007",
	issn = "1477-4054",
	journal = "Brief Bioinform",
	keywords = "metadata\_semantics, semantic\_integration",
	month = "March",
	number = "2",
	pages = "164--176",
	posted-at = "2010-01-25 18:46:11",
	priority = "2",
	title = "Scaling the walls of discovery: using semantic metadata for integrative problem solving",
	url = "http://dx.doi.org/10.1093/bib/bbp007",
	volume = "10",
	year = "2009"
}

@Article{ citeulike:6551063,
	abstract = "This study assesses the current metadata practices and trends in Association of Research Libraries (ARL) libraries, based on the survey Metadata conducted in spring 2007 (SPEC Kit 298: Metadata), a collaborative effort with the staff at the ARL. The survey investigates how metadata has been implemented in ARL member libraries: what kinds of projects or initiatives have been undertaken, what types of digital objects are associated with metadata, who are creating metadata, what schemas and tools are used to create and manage metadata, and the organizational changes and challenges resulting from the adoption of metadata in the libraries. The author summarizes her observations of the findings and the main themes that emerged from the metadata practices in libraries. She assesses the changing context of metadata creation and management and the evolution of metadata workflow and best practices in libraries. The author also discusses the roles and responsibilities of metadata professionals and the implications of metadata practices for the library and information community.",
	author = "Jin Ma",
	citeulike-article-id = "6551063",
	citeulike-linkout-0 = "http://dx.doi.org/10.1080/19386380903094977",
	doi = "10.1080/19386380903094977",
	journal = "Journal of Library Metadata",
	keywords = "metadata, survey",
	number = "1",
	pages = "1--14",
	posted-at = "2010-01-16 17:06:50",
	priority = "2",
	publisher = "Routledge",
	title = "Metadata in ARL Libraries: A Survey of Metadata Practices",
	url = "http://dx.doi.org/10.1080/19386380903094977",
	volume = "9",
	year = "2009"
}

@InProceedings{ citeulike:6545158,
	author = "Thomas Margaritopoulos and Merkourios Margaritopoulos and Ioannis Mavridis and Athanasios Manitsaris",
	booktitle = "DCMI '08: Proceedings of the 2008 International Conference on Dublin Core and Metadata Applications",
	citeulike-article-id = "6545158",
	keywords = "file-import-10-01-15",
	location = "Berlin, Germany",
	pages = "104--113",
	posted-at = "2010-01-15 20:17:08",
	priority = "2",
	publisher = "Dublin Core Metadata Initiative",
	title = "A conceptual framework for metadata quality assessment",
	year = "2008",
	file = "{:references:metadata.quality:margaritopoulos\_etal.2008.conceptual.framework.metadata.quality.pdf|}"
}

@Article{ citeulike:4072081,
	abstract = "This article introduces the Simple Knowledge Organisation System (SKOS), a Semantic Web language for representing controlled structured vocabularies, including thesauri, classification schemes, subject heading systems, and taxonomies. SKOS provides a framework for publishing thesauri, classification schemes, and subject indexes on the Web, and for applying these systems to resource collections that are part of the Semantic Web. Semantic Web applications may harvest and merge SKOS data, to integrate and enhance retrieval service across multiple collections (e.g., libraries). This article also describes some alternatives for integrating Semantic Web services based on the Resource Description Framework (RDF) and SKOS into a distributed enterprise architecture.",
	author = {Alistair Miles and Jos{\'e} R. P{\'e}rez-Ag{\"u}era},
	citeulike-article-id = "4072081",
	citeulike-linkout-0 = "http://dx.doi.org/10.1300/J104v43n03\\_04",
	doi = "10.1300/J104v43n03\_04",
	journal = "Cataloging \& Classification Quarterly",
	number = "3",
	pages = "69--83",
	posted-at = "2010-01-15 14:29:45",
	priority = "2",
	publisher = "Routledge",
	title = "SKOS: Simple Knowledge Organisation for the Web",
	url = "http://dx.doi.org/10.1300/J104v43n03_04",
	volume = "43",
	year = "2007"
}

@MastersThesis{ citeulike:2295482,
	abstract = "A primary motivation for the development of the Semantic Web has been the need for effective information retrieval systems which may be realised through vocabulary control and the use of structured metadata. The technological framework of the Web (URI, HTTP, XML) and of the Semantic Web (RDF, OWL, SPARQL) provides a platform upon which distributed data and metadata applications may be constructed, but does not in itself provide any direct support for information retrieval applications per se. Widely applicable Semantic Web languages that extend this basic layer and provide generic support for retrieval applications, in addition to good practice guidelines and design patterns for developing such applications, are required.The ultimate purpose of this report is to develop a formal theory of retrieval using controlled vocabularies that have a simple and intuitive structure, to provide the necessary theoretical foundations for the development of Semantic Web languages and design patterns for distributed retrieval applications. The main body of this report is devoted to the articulation of such a theory. The theory is expressed formally through the use of mathematical notation, with the intention that this level of formality will provide the bridge between informal requirements specifications and the implementation of effective retrieval applications in computer systems.Specifically, a theory is developed to describe the ways in which a structured vocabulary may be used to construct an index over a collection of objects and then used to express queries which may be evaluated against an index to obtain a set of results. This theory is extended to consider ways in which both the precision and recall of retrieval strategies may be improved, through the use of expansion and ranking techniques and through ” coordination”. The problem of translating between controlled vocabularies is also considered. The theory attempts to formalise, unify and extend the traditional wisdom of the library sciences regarding the use of thesauri, classification schemes, subject heading systems, taxonomies and other types of structured vocabulary, so that proven techniques and methodologies may be transferred to a Semantic Web context.The recently chartered W3C Semantic Web Deployment Working Group has been charged with the development of the Simple Knowledge Organisation System (SKOS) to W3C Recommendation status. SKOS is a Semantic Web language specifically intended to support information retrieval applications using controlled vocabularies that have a relatively simple structure. A formal requirements specification is the first planned deliverable in the standardisation of SKOS. An immediate goal of this report is to provide a level of abstraction that can be used to perform a comparative analysis of use cases involving information retrieval systems that operate with structured vocabularies, so that the requirements of these systems with respect to Semantic Web languages such as SKOS may be clearly determined. Also, this report suggests ways in which the theory may be mapped to concrete language constructs and representation patterns in Semantic Web languages. In so doing it is hoped that the development of SKOS and similar languages may be grounded with sufficient rigour to ensure their wide applicability and consistent use.",
	author = "Alistair Miles",
	citeulike-article-id = "2295482",
	citeulike-linkout-0 = "http://isegserv.itd.rl.ac.uk/retrieval/",
	posted-at = "2010-01-15 14:13:58",
	priority = "2",
	title = "Retrieval and the Semantic Web: A Theory of Retrieval Using Structured Vocabularies",
	url = "http://isegserv.itd.rl.ac.uk/retrieval/",
	year = "2006"
}

@Article{ citeulike:5944328,
	abstract = "Digital libraries (DLs) are complex information systems which can present changes in their structure, content, and services. These complexities and dynamics make system maintenance a non-trivial task, since it requires periodical evaluation of the different DL components. Generally, these evaluations are customized per system and are performed only when problems occur and administrator intervention is required. This work aims to change the situation. We present 5SQual, a tool which provides ways to perform automatic and configurable evaluations of some of the most important DL components, among them, digital objects, metadata, and services. The tool implements diverse numeric indicators that are associated with eight quality dimensions described in the 5S quality model. Its generic architecture was developed to be applicable to various DLs and scenarios. In sum, the main contributions of this work include: (i) the design and implementation of 5SQual, a tool that validates a theoretical DL quality model; (ii) the demonstration of the applicability of the tool in several usage scenarios; and (iii) the evaluation (with usability specialists) of its graphical interface specially designed to guide the configuration of 5SQual evaluations. We also present the results of interviews conducted with administrators of real DLs regarding their expectations and opinions about 5SQual.",
	author = "B{\'a}rbara L. Moreira and Marcos A. Gon\c{c}alves and Alberto H. F. Laender and Edward A. Fox",
	citeulike-article-id = "5944328",
	citeulike-linkout-0 = "http://dx.doi.org/10.1016/j.joi.2008.12.003",
	citeulike-linkout-1 = "http://linkinghub.elsevier.com/retrieve/pii/S1751157708000734",
	doi = "10.1016/j.joi.2008.12.003",
	issn = "17511577",
	journal = "Journal of Informetrics",
	keywords = "digital\_library, evaluation",
	month = "April",
	number = "2",
	pages = "102--123",
	posted-at = "2010-01-15 14:03:03",
	priority = "2",
	title = "Automatic evaluation of digital libraries with 5SQual",
	url = "http://dx.doi.org/10.1016/j.joi.2008.12.003",
	volume = "3",
	year = "2009"
}

@Article{ citeulike:937809,
	abstract = "Metadata plays a key role in digital projects. This article introduces the six steps of metadata implementation for digital projects at the Pennsylvania State (Penn State) University Libraries: analyzing metadata requirements, adopting metadata schemes, creating metadata content, delivery and access, evaluation of metadata, and sustaining metadata maintenance. An array of technical, managerial, and organizational questions and issues of metadata implementation are discussed in the context of digital initiatives. The author proposes a coordinated metadata management approach based upon Penn State's experiences and best practices.",
	author = "Jin Ma",
	citeulike-article-id = "937809",
	doi = "10.1016/j.lcats.2006.07.001",
	journal = "Library Collections, Acquisitions, and Technical Services",
	keywords = "metadata",
	month = "",
	number = "1-2",
	pages = "3--17",
	posted-at = "2009-07-01 20:52:37",
	priority = "0",
	title = "Managing metadata for digital projects",
	url = "http://dx.doi.org/10.1016/j.lcats.2006.07.001",
	volume = "30",
	year = "2006",
	file = "{:references:ma.2006.lcats.manage.metadata.digital.objects.pdf|}"
}

@Book{ citeulike:2709781,
	abstract = "{Introduction to Information Retrieval is the first textbook with a coherent treatment of classical and web information retrieval, including web search and the related areas of text classification and text clustering. Written from a computer science perspective, it gives an up-to-date treatment of all aspects of the design and implementation of systems for gathering, indexing, and searching documents; methods for evaluating systems; and an introduction to the use of machine learning methods on text collections. Designed as the primary text for a graduate or advanced undergraduate course in information retrieval, the book will also interest researchers and professionals. A complete set of lecture slides and exercises that accompany the book are available on the web.}",
	author = {Christopher D. Manning and Prabhakar Raghavan and Hinrich Sch{\"u}tze},
	citeulike-article-id = "2709781",
	howpublished = "Hardcover",
	isbn = "0521865719",
	keywords = "information\_retrieval",
	month = "July",
	posted-at = "2009-06-30 20:16:39",
	priority = "2",
	publisher = "Cambridge University Press",
	title = "Introduction to Information Retrieval",
	url = "http://www.amazon.com/exec/obidos/redirect?tag=citeulike07-20&path=ASIN/0521865719",
	year = "2008"
}

@Article{ citeulike:4214520,
	abstract = "This paper assesses the range of equivalence or mapping types required to facilitate interoperability in the context of a distributed terminology server. A detailed set of mapping types were examined, with a view to determining their validity for characterizing relationships between mappings from selected terminologies (AAT, LCSH, MeSH, and UNESCO) to the Dewey Decimal Classification (DDC) scheme. It was hypothesized that the detailed set of 19 match types proposed by Chaplan in 1995 is unnecessary in this context and that they could be reduced to a less detailed conceptually-based set. Results from an extensive mapping exercise support the main hypothesis and a generic suite of match types are proposed, although doubt remains over the current adequacy of the developing Simple Knowledge Organization System (SKOS) Core Mapping Vocabulary Specification (MVS) for inter-terminology mapping. 10.1177/0165551507079130",
	author = "Emma Mcculloch and George Macgregor",
	citeulike-article-id = "4214520",
	doi = "10.1177/0165551507079130",
	journal = "Journal of Information Science",
	keywords = "mapping, terminology",
	month = "February",
	number = "1",
	pages = "70--92",
	posted-at = "2009-06-30 16:39:43",
	priority = "2",
	title = "Analysis of equivalence mapping for terminology services",
	url = "http://dx.doi.org/10.1177/0165551507079130",
	volume = "34",
	year = "2008",
	file = "{:references:terminology.registry.service:mcculloch.2008.jis.mapping\_terminology\_services.pdf|}"
}

@Article{ citeulike:284929,
	abstract = "This study evaluates the data sources and research methods used in earlier studies to rank the research productivity of Library and Information Science (LIS) faculty and schools. In doing so, the study identifies both tools and methods that generate more accurate publication count rankings as well as databases that should be taken into consideration when conducting comprehensive searches in the literature for research and curricular needs. With a list of 2,625 items published between 1982 and 2002 by 68 faculty members of 18 American Library Association- (ALA-) accredited LIS schools, hundreds of databases were searched. Results show that there are only 10 databases that provide significant coverage of the LIS indexed literature. Results also show that restricting the data sources to one, two, or even three databases leads to inaccurate rankings and erroneous conclusions. Because no database provides comprehensive coverage of the LIS literature, researchers must rely on a wide range of disciplinary and multidisciplinary databases for ranking and other research purposes. The study answers such questions as the following: Is the Association of Library and Information Science Education's (ALISE's) directory of members a reliable tool to identify a complete list of faculty members at LIS schools? How many and which databases are needed in a multifile search to arrive at accurate publication count rankings? What coverage will be achieved using a certain number of databases? Which research areas are well covered by which databases? What alternative methods and tools are available to supplement gaps among databases? Did coverage performance of databases change over time? What counting method should be used when determining what and how many items each LIS faculty and school has published? The authors recommend advanced analysis of research productivity to provide a more detailed assessment of research productivity of authors and programs.",
	author = "Lokman I. Meho and Kristina M. Spurgin",
	citeulike-article-id = "284929",
	doi = "10.1002/asi.20227",
	issn = "1532-2890",
	journal = "Journal of the American Society for Information Science and Technology",
	keywords = "rankding\_lis",
	month = "August",
	posted-at = "2009-06-30 16:43:18",
	priority = "2",
	title = "Ranking the research productivity of library and information science faculty and schools: An evaluation of data sources and research methods",
	url = "http://dx.doi.org/10.1002/asi.20227",
	year = "2005",
	file = "{:references:001:meho-spurgin.pdf|}"
}

@Article{ citeulike:97070,
	abstract = "The paper discusses the notion of steps in indexing and reveals that the document-centered approach to indexing is prevalent and argues that the document-centered approach is problematic because it blocks out context-dependent factors in the indexing process. A domain-centered approach to indexing is presented as an alternative and the paper discusses how this approach includes a broader range of analyses and how it requires a new set of actions from using this approach; analysis of the domain, users and indexers. The paper concludes that the two-step procedure to indexing is insufficient to explain the indexing process and suggests that the domain-centered approach offers a guide for indexers that can help them manage the complexity of indexing.",
	author = "Jens-Erik Mai",
	citeulike-article-id = "97070",
	citeulike-linkout-0 = "http://dx.doi.org/10.1016/j.ipm.2003.12.004",
	citeulike-linkout-1 = "http://www.sciencedirect.com/science/article/B6VC8-4BN0DSN-2/2/041a56f590f2166e0305c00d5d311a73",
	doi = "10.1016/j.ipm.2003.12.004",
	journal = "Information Processing \& Management",
	keywords = "indexers, metadata",
	month = "May",
	number = "3",
	pages = "599--611",
	posted-at = "2009-12-02 00:14:15",
	priority = "2",
	title = "Analysis in indexing: document and domain centered approaches",
	url = "http://dx.doi.org/10.1016/j.ipm.2003.12.004",
	volume = "41",
	year = "2005",
	file = "{:references:metadata.creation:mai.2005.ipm.analysis.indexing.pdf|}"
}

@InProceedings{ citeulike:621108,
	author = "Mikael Nilsson and Matthias Parlmer and Ambjorn Naeve",
	booktitle = "Proceedings of the 11th World Wide Web Conference (WWW2002)",
	citeulike-article-id = "621108",
	citeulike-linkout-0 = "http://wwwconf.ecs.soton.ac.uk/archive/00000221/01/",
	citeulike-linkout-1 = "http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.12.7652\&\\#38;rep=rep1\&\\#38;type=url\&\\#38;i=0",
	location = "Hawaii, USA",
	month = "March",
	pages = "1--22",
	posted-at = "2010-01-15 20:23:24",
	priority = "2",
	title = "Semantic Web Metadata for e-Learning - Some Architectural Guidelines",
	url = "http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.12.7652&rep=rep1&type=url&i=0",
	year = "2002"
}

@InProceedings{ citeulike:2916089,
	abstract = "We describe a Web-based metadata quality tool that provides statistical descriptions and visualisations of Dublin Core metadata harvested via the OAI protocol. The lightweight nature of development allows it to be used to gather contextualized requirements and some initial user feedback is discussed.",
	address = "New York, NY, USA",
	author = "David M. Nichols and Chu-Hsiang Chan and David Bainbridge and Dana Mckay and Michael B. Twidale",
	booktitle = "JCDL '08: Proceedings of the 8th ACM/IEEE-CS joint conference on Digital libraries",
	citeulike-article-id = "2916089",
	doi = "10.1145/1378889.1378957",
	isbn = "9781595939982",
	keywords = "metadata, metadata\_quality",
	pages = "385--388",
	posted-at = "2009-06-30 12:31:03",
	priority = "2",
	publisher = "ACM",
	title = "A lightweight metadata quality tool",
	url = "http://dx.doi.org/10.1145/1378889.1378957",
	year = "2008",
	file = "{:references:dspace.metadata.quality.canada:oai\_metadata\_quality.pdf|}"
}

@TechReport{ citeulike:3470246,
	abstract = "Current institutional repository software provides few tools to help metadata librarians understand and analyse their collections. In this paper we compare and contrast metadata analysis tools that were developed simultaneously, but independently, at two New Zealand institutions during a period of national investment in research repositories: the Metadata Analysis Tool (MAT) at The University of Waikato, and the Kiwi Research Information Service (KRIS) at the National Library of New Zealand. The tools have many similarities: they are convenient, online, on-demand services that harvest metadata using OAI-PMH, they were developed in response to feedback from repository administrators, and they both help pinpoint specific metadata errors as well as generating summary statistics. They also have significant differences: one is a dedicated tool while the other is part of a wider access tool; one gives a holistic view of the metadata while the other looks for specific problems; one seeks patterns in the data values while the other checks that those values conform to metadata standards. Both tools work in a complementary manner to existing web-based administration tools. We have observed that discovery and correction of metadata errors can be quickly achieved by switching web browser views from the analysis tool to the repository interface, and back. We summarise the findings from both tools' deployment into a checklist of requirements for metadata analysis tools.",
	author = "David M. Nichols and Gordon W. Paynter and Chu-Hsiang Chan and David Bainbridge and Dana Mckay and Michael B. Twidale and Ann Blandford",
	citeulike-article-id = "3470246",
	keywords = "metadata",
	month = "August",
	organization = "Department of Computer Science-University of Waikato Working Paper Series",
	posted-at = "2009-06-30 12:33:16",
	priority = "2",
	title = "Metadata tools for institutional repositories",
	url = "http://eprints.rclis.org/archive/00014732/",
	year = "2008",
	file = "{:references:dspace.metadata.quality.canada:oai\_ir.pdf|}"
}

@Article{ citeulike:4753116,
	author = "Xavier Ochoa and Erik Duval",
	citeulike-article-id = "4753116",
	keywords = "metadata, quality",
	posted-at = "2009-06-05 15:51:21",
	priority = "0",
	title = "Towards Automatic Evaluation of Metadata Quality in Digital Repositories",
	url = "http://ariadne.cti.espol.edu.ec/M4M/files/TowardsAutomaticQuality.pdf",
	year = "2005"
}

@Article{ citeulike:5497773,
	abstract = "Abstract Owing to the recent developments in automatic metadata generation and interoperability between digital repositories, the production of metadata is now vastly surpassing manual quality control capabilities. Abandoning quality control altogether is problematic, because low-quality metadata compromise the effectiveness of services that repositories provide to their users. To address this problem, we present a set of scalable quality metrics for metadata based on the Bruce \& Hillman framework for metadata quality control. We perform three experiments to evaluate our metrics: (1) the degree of correlation between the metrics and manual quality reviews, (2) the discriminatory power between metadata sets and (3) the usefulness of the metrics as low-quality filters. Through statistical analysis, we found that several metrics, especially Text Information Content, correlate well with human evaluation and that the average of all the metrics are roughly as effective as people to flag low-quality instances. The implications of this finding are discussed. Finally, we propose possible applications of the metrics to improve tools for the administration of digital repositories.",
	author = "Xavier Ochoa and Erik Duval",
	citeulike-article-id = "5497773",
	citeulike-linkout-0 = "http://dx.doi.org/10.1007/s00799-009-0054-4",
	citeulike-linkout-1 = "http://www.springerlink.com/content/0830860110v30832",
	doi = "10.1007/s00799-009-0054-4",
	journal = "International Journal on Digital Libraries",
	keywords = "metadata\_quality, metrics",
	month = "August",
	number = "2-3",
	pages = "67--91",
	posted-at = "2010-01-07 05:27:19",
	priority = "0",
	title = "Automatic evaluation of metadata quality in digital repositories",
	url = "http://dx.doi.org/10.1007/s00799-009-0054-4",
	volume = "10",
	year = "2009",
	file = "{:references:metadata.quality:ochoa.2009.ijdl.automatic\_evaluation\_metadata\_quality.pdf|}"
}

@InProceedings{ citeulike:3833131,
	address = "New York, NY, USA",
	author = "Steffen Oldenburg and Martin Garbe and Clemens Cap",
	booktitle = "SSM '08: Proceeding of the 2008 ACM workshop on Search in social media",
	citeulike-article-id = "3833131",
	doi = "10.1145/1458583.1458587",
	isbn = "9781605582580",
	keywords = "similarity\_analysis, social\_classification",
	location = "Napa Valley, California, USA",
	pages = "11--18",
	posted-at = "2009-06-30 15:58:50",
	priority = "2",
	publisher = "ACM",
	title = "Similarity cross-analysis of tag / co-tag spaces in social classification systems",
	url = "http://dx.doi.org/10.1145/1458583.1458587",
	year = "2008"
}

@Article{ citeulike:6854365,
	abstract = "In current library practice, trained human experts usually carry out document cataloguing and indexing based on a manual approach. With the explosive growth in the number of electronic documents available on the Internet and digital libraries, it is increasingly difficult for library practitioners to categorize both electronic documents and traditional library materials using just a manual approach. To improve the effectiveness and efficiency of document categorization at the library setting, more in-depth studies of using automatic document classification methods to categorize library items are required. Machine learning research has advanced rapidly in recent years. However, applying machine learning techniques to improve library practice is still a relatively unexplored area. This paper illustrates the design and development of a machine learning based automatic document classification system to alleviate the manual categorization problem encountered within the library setting. Two supervised machine learning algorithms have been tested. Our empirical tests show that supervised machine learning algorithms in general, and the k-nearest neighbours (KNN) algorithm in particular, can be used to develop an effective document classification system to enhance current library practice. Moreover, some concrete recommendations regarding how to practically apply the KNN algorithm to develop automatic document classification in a library setting are made. To our best knowledge, this is the first in-depth study of applying the KNN algorithm to automatic document classification based on the widely used LCC classification scheme adopted by many large libraries. 10.1177/0165551507082592",
	author = "Joanna Y. Pong and Ron C. Kwok and Raymond Y. Lau and Jin-Xing Hao and Percy C. Wong",
	citeulike-article-id = "6854365",
	citeulike-linkout-0 = "http://dx.doi.org/10.1177/0165551507082592",
	citeulike-linkout-1 = "http://jis.sagepub.com/cgi/content/abstract/34/2/213",
	day = "1",
	doi = "10.1177/0165551507082592",
	journal = "Journal of Information Science",
	month = "April",
	number = "2",
	pages = "213--230",
	posted-at = "2010-03-15 20:21:39",
	priority = "2",
	title = "A comparative study of two automatic document classification methods in a library setting",
	url = "http://dx.doi.org/10.1177/0165551507082592",
	volume = "34",
	year = "2008"
}

@Article{ citeulike:6544583,
	abstract = "This study examines Dublin Core (DC) metadata semantics drawn from the perspectives and experiences of cataloguing and metadata professionals. The study ascertains the extent of difficulty in applying the DC metadata elements encountered by these professionals and examines factors engendering such difficulties during the metadata application process. Comments drawn from the survey participants (n = 141) show that conceptual ambiguities (41\%) and semantic overlaps (45\%) of the surveyed DC metadata elements are the most frequently cited factors causing difficulty and confusion, in turn leading to variant interpretations of DC metadata elements. This has the potential to bring forth inconsistent and inaccurate applications and implementation of the DC standard across institutions which can directly affect semantic interoperability across digital repositories. The high degree of difficulty (55.3\%) engendered by the Relation field indicates that further examination of this element is needed. 10.1177/0165551509337871",
	author = "Jungran Park and Eric Childress",
	citeulike-article-id = "6544583",
	citeulike-linkout-0 = "http://dx.doi.org/10.1177/0165551509337871",
	citeulike-linkout-1 = "http://jis.sagepub.com/cgi/content/abstract/35/6/727",
	day = "1",
	doi = "10.1177/0165551509337871",
	journal = "Journal of Information Science",
	keywords = "metadata, semantic",
	month = "December",
	number = "6",
	pages = "727--739",
	posted-at = "2010-01-15 16:52:46",
	priority = "0",
	title = "Dublin Core metadata semantics: an analysis of the perspectives of information professionals",
	url = "http://dx.doi.org/10.1177/0165551509337871",
	volume = "35",
	year = "2009",
	file = "{:references:metadata.semantics:park.2009.jis.dc\_metadata\_semantics.pdf|}"
}

@Misc{ citeulike:1453509,
	abstract = "RDF is a directed, labeled graph data format for representing information in the Web. This specification defines the syntax and semantics of the SPARQL query language for RDF. SPARQL can be used to express queries across diverse data sources, whether the data is stored natively as RDF or viewed as RDF via middleware. SPARQL contains capabilities for querying required and optional graph patterns along with their conjunctions and disjunctions. SPARQL also supports extensible value testing and constraining queries by source RDF graph. The results of SPARQL queries can be results sets or RDF graphs.",
	author = "Eric Prud'hommeaux and Andy Seaborne",
	citeulike-article-id = "1453509",
	citeulike-linkout-0 = "http://www.w3.org/TR/rdf-sparql-query/",
	howpublished = "W3C Recommendation",
	institution = "World Wide Web Consortium",
	month = "January",
	posted-at = "2010-01-15 14:21:27",
	priority = "0",
	title = "SPARQL Query Language for RDF",
	url = "http://www.w3.org/TR/rdf-sparql-query/",
	year = "2008"
}

@Misc{ citeulike:379834,
	abstract = "The growing infrastructure for Web Services assumes a \&quot;programmer in the loop\&quot; that hardcodes the connections between Web Services and directly programs Web Service composition. Emerging technology based on DAML-S and the Semantic Web allows Web Services to connect and transact automatically with minimal or no intervention from programmers. In this paper we discuss the problems related with autonomous Web Services, and how DAMLS provides the information to solve them. Furthermore, we describe...",
	author = "M. Paolucci and K. Sycara and T. Kawamura",
	citeulike-article-id = "379834",
	keywords = "semantic\_web, web\_services",
	posted-at = "2009-06-30 16:19:43",
	priority = "2",
	title = "Delivering Semantic Web Services",
	url = "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.14.4382",
	year = "2002",
	file = "{:references:001:deliver.semantic.web.services.pdf|}"
}

@Article{ citeulike:5800826,
	author = "Eun G. Park",
	citeulike-article-id = "5800826",
	citeulike-linkout-0 = "http://dx.doi.org/10.1108/02640470710741331",
	doi = "10.1108/02640470710741331",
	issn = "0264-0473",
	journal = "The Electronic Library",
	keywords = "metadata, metadata\_assessment",
	number = "2",
	pages = "207--218",
	posted-at = "2009-09-18 00:56:42",
	priority = "0",
	publisher = "Emerald Group Publishing Limited",
	title = "Building interoperable Canadian architecture collections: initial metadata assessment",
	url = "http://dx.doi.org/10.1108/02640470710741331",
	volume = "25",
	year = "2007",
	file = "{:references:archive.ontology:building.architecture.collections.pdf|}"
}

@Article{ citeulike:1209626,
	author = "Eun G. Park and Qing Zou and David Mcknight",
	citeulike-article-id = "1209626",
	doi = "10.1108/00330330710724917",
	issn = "0033-0337",
	journal = "Program: electronic library \& information systems",
	keywords = "etd",
	number = "1",
	pages = "81--91",
	posted-at = "2008-06-01 21:32:53",
	priority = "0",
	publisher = "Emerald Group Publishing Limited",
	title = "Electronic thesis initiative: pilot project of McGill University, Montreal",
	url = "http://dx.doi.org/10.1108/00330330710724917",
	volume = "41",
	year = "2007",
	file = "{:references:my\_published\_articles:2007.etd\_mcgill\_emerald\_program.pdf|}"
}

@Article{ citeulike:5784891,
	author = "Jung-ran Park and Susan Msazaros",
	citeulike-article-id = "5784891",
	journal = "Knowledge Organization",
	keywords = "metadata, metadata\_quality",
	number = "1",
	pages = "46--59",
	posted-at = "2009-09-15 01:53:34",
	priority = "0",
	title = "Metadata Object Description Schema (MODS) in Digital Repositories: An Exploratory Study of Metadata Use and Quality",
	volume = "36",
	year = "2009"
}

@Article{ citeulike:5792203,
	author = "Jung-ran Park",
	citeulike-article-id = "5792203",
	journal = "Knowledge Organization",
	keywords = "digital\_image\_collections, metadata\_quality, semantic\_interoperability",
	number = "1",
	pages = "20--34",
	posted-at = "2009-09-16 17:03:03",
	priority = "3",
	title = "Semantic Interoperability and Metadata Quality: An Analysis of Metadata Item Records of Digital Image Collections",
	volume = "33",
	year = "2006"
}

@Book{ citeulike:108696,
	abstract = "{<i>Practical RDF</i> explains RDF from the ground up, providing real-world examples and descriptions of how the technology is being used in applications like Mozilla, FOAF, and Chandler, as well as infrastructure you can use to build your own applications. This book cuts to the heart of the W3C's often obscure specifications, giving you tools to apply RDF successfully in your own projects. The first part of the book focuses on the RDF specifications. After an introduction to RDF, the book covers the RDF specification documents themselves, including RDF Semantics and Concepts and Abstract Model specifications, RDF constructs, and the RDF Schema. The second section focuses on programming language support, and the tools and utilities that allow developers to review, edit, parse, store, and manipulate RDF/XML. Subsequent sections focus on RDF's data roots, programming and framework support, and practical implementation and use of RDF and RDF/XML. If you want to know how to apply RDF to information processing, <i>Practical RDF</i> is for you. Whether your interests lie in large-scale information aggregation and analysis or in smaller-scale projects like weblog syndication, this book will provide you with a solid foundation for working with RDF. }",
	author = "Shelley Powers",
	citeulike-article-id = "108696",
	howpublished = "Paperback",
	isbn = "0596002637",
	keywords = "rdf",
	month = "August",
	posted-at = "2009-06-30 20:12:47",
	priority = "0",
	publisher = "O'Reilly",
	title = "Practical RDF",
	url = "http://www.amazon.com/exec/obidos/redirect?tag=citeulike07-20&path=ASIN/0596002637",
	year = "2003"
}

@Article{ citeulike:4981874,
	abstract = "Describes a study that was conducted to determine how well subject authority lists, or thesauri, control indexing vocabulary. Indexer consistency using the Commonwealth Agricultural Bureaux (CAB) thesaurus was tested by comparing indexing done by CAB and by National Agricultural Library (NAL) indexers. (six references) (LRW)",
	author = "Phyllis Reich and Erik J. Biever",
	citeulike-article-id = "4981874",
	citeulike-linkout-0 = "http://www.eric.ed.gov/ERICWebPortal/detail?accno=EJ430272",
	day = "0",
	journal = "College and Research Libraries",
	keywords = "indexerconsistency, indexing, paper1, thesaurus, thesis",
	month = "",
	number = "4",
	pages = "336--42",
	posted-at = "2009-06-26 20:57:47",
	priority = "0",
	title = "Indexing Consistency: The Input/Output Function of Thesauri.",
	url = "http://www.eric.ed.gov/ERICWebPortal/detail?accno=EJ430272",
	volume = "52",
	year = "1991"
}

@Article{ citeulike:3613626,
	abstract = "Abstract\&nbsp;\&nbsp;Many scholarly writings today are available in electronic formats. With universities around the world choosing to make digital versions of their dissertations, theses, project reports, and related files and data sets available online, an overwhelming amount of information is becoming available on almost any particular topic. How will users decide which dissertation, or subsection of a dissertation, to read to get the required information on a particular topic? What kind of services can such digital libraries provide to make knowledge discovery easier? In this paper, we investigate these issues, using as a case study the Networked Digital Library of Theses and Dissertations (NDLTD), a rapidly growing collection that already has about 800,000 Electronic Theses and Dissertations (ETDs) from universities around the world. We propose the design for a scalable, Web Services based tool KDWebS (Knowledge Discovery System based on Web Services), to facilitate automated knowledge discovery in NDLTD. We also provide some preliminary proof of concept results to demonstrate the efficacy of the approach.",
	author = "W. Richardson and Venkat Srinivasan and Edward Fox",
	citeulike-article-id = "3613626",
	citeulike-linkout-0 = "http://dx.doi.org/10.1007/s00799-008-0046-9",
	citeulike-linkout-1 = "http://www.springerlink.com/content/w3182840w7j17117",
	day = "1",
	doi = "10.1007/s00799-008-0046-9",
	journal = "International Journal on Digital Libraries",
	month = "November",
	number = "2",
	pages = "163--171",
	posted-at = "2010-01-15 20:50:28",
	priority = "2",
	title = "Knowledge discovery in digital libraries of electronic theses and dissertations: an NDLTD case study",
	url = "http://dx.doi.org/10.1007/s00799-008-0046-9",
	volume = "9",
	year = "2008"
}

@Article{ citeulike:2173931,
	abstract = "Latent semantic analysis has been used for several years to improve the performance of document library searches. We show that latent semantic analysis, augmented with a Part-of-Speech Tagger, may be an effective algorithm for classifying a textual document as well. Using Brille's Part-of-Speech Tagger, we truncate the singular value decomposition used in latent semantic analysis to reduce the size of the word-frequency matrix. This method is then tested on a toy problem, and has shown to increase search accuracy. We then relate these results to natural language processing and show that latent semantic analysis can be combined with context free grammars to infer semantic meaning from natural language. English is the natural language currently being used.",
	address = "Computer Science Dept., University of Southern Mississippi, 730 East Beach Blvd, Long Beach, MS 39560",
	author = "Tom Rishel and Louise A. Perkins and Sumanth Yenduri and Farnaz Zand",
	citeulike-article-id = "2173931",
	doi = "10.1002/asi.20687",
	journal = "Journal of the American Society for Information Science and Technology",
	keywords = "latent\_sementic",
	number = "14",
	pages = "2197--2204",
	posted-at = "2009-06-30 06:56:53",
	priority = "2",
	title = "Determining the context of text using augmented latent semantic indexing",
	url = "http://dx.doi.org/10.1002/asi.20687",
	volume = "58",
	year = "2007",
	file = "{:references:metadata.autogen:autogen04.pdf|}"
}

@Article{ citeulike:222603,
	author = "R. John Robertson",
	citeulike-article-id = "222603",
	doi = "10.1108/00242530510600543",
	issn = "0024-2535",
	journal = "Library Review",
	keywords = "metadata, metadata\_quality",
	month = "May",
	number = "5",
	pages = "295--300",
	posted-at = "2009-07-01 20:58:28",
	priority = "3",
	publisher = "Emerald Group Publishing Limited",
	title = "Metadata quality: implications for library and information science professionals",
	url = "http://dx.doi.org/10.1108/00242530510600543",
	volume = "54",
	year = "2005",
	file = "{:references:metadata.quality:robertson.2005.metadata.quality.implications.pdf|}"
}

@Misc{ citeulike:2968250,
	abstract = "In spite of its tremendous value, metadata is generally sparse and incomplete, thereby hampering the effectiveness of digital information services. Many of the existing mechanisms for the automated creation of metadata rely primarily on content analysis which can be costly and inefficient. The automatic metadata generation system proposed in this article leverages resource relationships generated from existing metadata as a medium for propagation from metadata-rich to metadata-poor resources. Because of its independence from content analysis, it can be applied to a wide variety of resource media types and is shown to be computationally inexpensive. The proposed method operates through two distinct phases. Occurrence and co-occurrence algorithms first generate an associative network of repository resources leveraging existing repository metadata. Second, using the associative network as a substrate, metadata associated with metadata-rich resources is propagated to metadata-poor resources by means of a discrete-form spreading activation algorithm. This article discusses the general framework for building associative networks, an algorithm for disseminating metadata through such networks, and the results of an experiment and validation of the proposed method using a standard bibliographic dataset.",
	archivePrefix = "arXiv",
	author = "Marko A. Rodriguez and Johan Bollen and Herbert {Van de Sompel}",
	citeulike-article-id = "2968250",
	eprint = "0807.0023",
	keywords = "generation, metadata",
	month = "Jun",
	posted-at = "2009-06-30 06:18:58",
	priority = "2",
	title = "Automatic Metadata Generation using Associative Networks",
	url = "http://arxiv.org/abs/0807.0023",
	year = "2008",
	file = "{:references:metadata.autogen:future.metadata.autogen.pdf|}"
}

@Article{ citeulike:6548831,
	abstract = "Purpose -- To report the results of the 2005 {CARL} survey of institutional repositories {(IRs)} and discuss some of the challenges of implementing {IRs} in Canada. Design/methodology/approach -- This takes the form of a survey questionnaire. Findings -- There are a number of important issues confronting implementers of institutional repositories. Most of these issues are not insurmountable,but, to be properly addressed will require collaboration amongst implementers and resources. Research limitations/implications -- The findings issues identified through the survey contribute to the workplan of the {CARL} Institutional Repositories Project. Originality/value -- The paper presents an up to date account of the state of institutional repositories in Canada.",
	author = "Kathleen Shearer",
	citeulike-article-id = "6548831",
	citeulike-linkout-0 = "http://dx.doi.org/10.1108/07378830610669547",
	citeulike-linkout-1 = "http://www.emeraldinsight.com/10.1108/07378830610669547",
	doi = "10.1108/07378830610669547",
	journal = "Library Hi Tech",
	keywords = "canada, digital, file-import-10-01-16, libraries",
	number = "2",
	pages = "165--172",
	posted-at = "2010-01-16 01:33:22",
	priority = "2",
	title = "The {CARL} institutional repositories project: A collaborative approach to addressing the challenges of {IRs} in Canada",
	url = "http://dx.doi.org/10.1108/07378830610669547",
	volume = "24",
	year = "2006"
}

@Article{ citeulike:6545140,
	abstract = "The objectives of the study were to conduct a series of observations and experiments under as real-life a situ-ation as possible related to: (1) user context of questions in information retrieval; (2) the structure and classi-fication of questions; (3) cognitive traits and decision making of searchers; and (4) different searches of the same question. The study is presented in three parts: Part I presents the background of the study and de-scribes the models, measures, methods, procedures and statistical analyses used. Part II is devoted to results related to users, questions and effectiveness measures, and Part Ill to results related to searchers, searches and overlap studies. A concluding summary of all results is presented in Part ill. Summary of the Study This is a second article in a series of three, reporting on a study of information seeking and retrieving. The first dealt with the methodological aspects describing the aim, objec-tives and approach, related works, and models, measures and procedures used, including references appropriate for the study as a whole [ 11. This second part concentrates on results connected with users, questions, and effectiveness measures. The third part concentrates on results connected with searchers, searches, and overlap studies. A Final Re-port together with appendices was deposited with ERIC and NTIS [2]; it contains the details of the study with emphasis on procedures and presentation of ” raw ” data. A summary of methods used in the study is provided here, so that a reader",
	author = "Tefko Saracevic and Paul Kantor",
	citeulike-article-id = "6545140",
	citeulike-linkout-0 = "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.119.6060",
	journal = "Journal of the American Society for Information Science",
	pages = "177--196",
	posted-at = "2010-01-15 20:09:19",
	priority = "2",
	title = "A study of information seeking and retrieving. II. Users, questions, and effectiveness",
	url = "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.119.6060",
	volume = "39",
	year = "1988"
}

@Article{ citeulike:6545138,
	abstract = "The objectives of the study were to conduct a series of observations and experiments under as real-life a situa-tion as possible related to: (i) user context of questions in information retrieval; (ii) the structure and classi-fication of questions; (iii) cognitive traits and decision making of searchers; and (iv) different searches of the same question. The study is presented in three parts: Part I presents the background ot the study and de-scribes the models, measures, methods, procedures, and statistical analyses used. Part II is devoted to results related to users, questions, and effectiveness measures, and Part III to results related to searchers, searches, and overlap studies. A concluding summary of all results is presented in Part III. introduction Problem, Motivation, Significance Users and their questions are fundamental to all kinds of information systems, and human decisions and human-system interactions are by far the most important variables in processes dealing with searching for and retrieval of in-formation. These statements are true to the point of being trite. Nevertheless, it is nothing but short of amazing how relatively little knowledge and understanding in a scientific sense we have about these factors. Information retrieval",
	author = "Tefko Saracevic and Paul Kantor and Alice Y. Chamis and Donna Trivison",
	citeulike-article-id = "6545138",
	citeulike-linkout-0 = "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.94.4923",
	journal = "Journal of the American Society for Information Science",
	pages = "161--176",
	posted-at = "2010-01-15 20:08:07",
	priority = "2",
	title = "A study of information seeking and retrieving. I. Background and methodology",
	url = "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.94.4923",
	volume = "39",
	year = "1988"
}

@Article{ citeulike:6545132,
	abstract = "The objectives of the study were to conduct a series of observations and experiments under as real-life situ-ation as possible related to: (1) user context of questions in information retrieval; (2) the structure and classi-fication of questions; (3) cognitive traits and decision making of searchers; and (4) different searches of the same question. The study is presented in three parts: Part I presents the background of the study and de-scribes the models, measures, methods, procedures and statistical analyses used. Part II is devoted to results related to users, questions and effectiveness measures, and Part III to results related to searchers, searches and overlap studies. A concluding summary of all results is presented in Part III.",
	author = "Tefko Saracevic and Paul Kantor",
	citeulike-article-id = "6545132",
	citeulike-linkout-0 = "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.87.5184",
	journal = "Journal of the American Society for Information Science",
	pages = "197--216",
	posted-at = "2010-01-15 20:06:12",
	priority = "2",
	title = "A study of information seeking and retrieving. III. searchers, searches, overlap",
	url = "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.87.5184",
	volume = "39",
	year = "1988"
}

@Article{ citeulike:6544607,
	address = "New York, NY, USA",
	author = "Alenka \v{S}auperl and Janko Klasinc and Simona Lu\v{z}ar",
	citeulike-article-id = "6544607",
	citeulike-linkout-0 = "http://dx.doi.org/http://dx.doi.org/10.1002/asi.v59:9",
	doi = "http://dx.doi.org/10.1002/asi.v59:9",
	journal = "Journal of the American Society for Information Science and Technology",
	number = "9",
	pages = "1420--1432",
	posted-at = "2010-01-15 17:07:04",
	priority = "2",
	publisher = "John Wiley \& Sons, Inc.",
	title = "Components of abstracts: Logical structure of scholarly abstracts in pharmacology, sociology, and linguistics and literature",
	url = "http://dx.doi.org/http://dx.doi.org/10.1002/asi.v59:9",
	volume = "59",
	year = "2008"
}

@Article{ citeulike:5721593,
	abstract = "This article describes a model for online consumer health information consisting of five quality criteria constructs. These constructs are grounded in empirical data from the perspectives of the three main sources in the communication process: health information providers, consumers, and intermediaries, such as Web directory creators and librarians, who assist consumers in finding healthcare information. The article also defines five constructs of Web page structural markers that could be used in information quality evaluation and maps these markers to the quality criteria. Findings from correlation analysis and multinomial logistic tests indicate that use of the structural markers depended significantly on the type of Web page and type of information provider. The findings suggest the need to define genre-specific templates for quality evaluation and the need to develop models for an automatic genre-based classification of health information Web pages. In addition, the study showed that consumers may lack the motivation or literacy skills to evaluate the information quality of health Web pages, which suggests the need to develop accessible automatic information quality evaluation tools and ontologies.",
	author = "Besiki Stvilia and Lorri Mon and Yong J. Yi",
	citeulike-article-id = "5721593",
	citeulike-linkout-0 = "http://dx.doi.org/10.1002/asi.21115",
	doi = "10.1002/asi.21115",
	issn = "15322882",
	journal = "Journal of the American Society for Information Science and Technology",
	month = "September",
	number = "9",
	pages = "1781--1791",
	posted-at = "2010-01-15 14:32:58",
	priority = "2",
	title = "A model for online consumer health information quality",
	url = "http://dx.doi.org/10.1002/asi.21115",
	volume = "60",
	year = "2009"
}

@Article{ citeulike:4149292,
	abstract = "Key words: scholarly discourse -- scientific publishing --ontologies -- knowledge-based systems -- argumentation -- visualization -- eprint servers -- internet digital libraries Abstract. The internet is rapidly becoming the first place for researchers to publish documents, but at present they receive little support in searching, tracking, analyzing or debating concepts in a literature from scholarly perspectives. This paper describes the design rationale and implementation of ScholOnto, an ontology-based digital library server to support scholarly interpretation and discourse. It enables researchers to describe and debate via a semantic network the contributions a document makes, and its relationship to the literature. The paper discusses the computational services that an ontology-based server supports, alternative user interfaces to support interaction with a large semantic network, usability issues associated with knowledge formalization, new work practices that could emerge, and related work. 1 1",
	author = "Simon B. Shum and Enrico Motta and John Domingue",
	citeulike-article-id = "4149292",
	citeulike-linkout-0 = "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.36.3835",
	journal = "International Journal on Digital Libraries",
	pages = "237--248",
	posted-at = "2010-01-15 14:23:09",
	priority = "2",
	title = "ScholOnto: an Ontology-Based Digital Library Server for Research Documents and Discourse",
	url = "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.36.3835",
	volume = "3",
	year = "2000"
}

@Article{ citeulike:935556,
	abstract = "This essay is a personal analysis of information science as a field of scientific inquiry and professional practice that has evolved over the past half-century. Various sections examine the origin of information science in respect to the problems of information explosion; the social role of the field; the nature of ?information? in information science; the structure of the field in terms of problems addressed; evolutionary trends in information retrieval as a major branch of information science; the relation of information science to other fields, most notably librarianship and computer science; and educational models and issues. Conclusions explore some dominant trends affecting the field.",
	address = "School of Communication, Information and Library Studies, Rutgers University, 4 Huntington Street, New Brunswick, NJ 08903",
	author = "Tefko Saracevic",
	citeulike-article-id = "935556",
	citeulike-linkout-0 = "http://dx.doi.org/10.1002/(SICI)1097-4571(1999)50:12%3C1051::AID-ASI2%3E3.0.CO;2-Z",
	citeulike-linkout-1 = "http://www3.interscience.wiley.com/cgi-bin/abstract/69500811/ABSTRACT",
	doi = "10.1002/(SICI)1097-4571(1999)50:12%3C1051::AID-ASI2%3E3.0.CO;2-Z",
	journal = "Journal of the American Society for Information Science",
	keywords = "information\_science",
	number = "12",
	pages = "1051--1063",
	posted-at = "2009-09-07 02:22:51",
	priority = "0",
	title = "Information science",
	url = "http://dx.doi.org/10.1002/(SICI)1097-4571(1999)50:12%3C1051::AID-ASI2%3E3.0.CO;2-Z",
	volume = "50",
	year = "1999",
	file = "{:references:phd1st:glis702:saracevic\_information\_science\_jasis1999.pdf|}"
}

@Article{ citeulike:559522,
	abstract = {The semantic Web vision of a "unifying logical language that enables concepts to be progressively linked into a universal Web" is part of along lineage of dreams of a universal repository of ideas: from Diderot's universal encyclopedia in the 18th century to Vannevar Bush's Memex at the beginning of the computer age to Ted Nelson's Xanadu in the 1970s. However, the semantic Web's development so far has focused primarily on metadata and carefully designed data structures. To realize Berners-Lee's vision, the semantic Web must capture and represent content created every day by people without special training - such content includes blogs, emails, and discussion groups. Rhizome is an experimental, open source content management framework the author have created that can capture and represent informal, human-authored content in a semantically rich manner. Rhizome aims to help bring about a new kind of commons - one of ideas. This commons wouldn't comprise just a web of interlinked pages of content, as is the current World Wide Web, but a web of relationships between the underlying ideas and distinctions that the content implies: a permanent, universally accessible interlinking of content based on imputed semantics such as concepts, definitions, or structured argumentation.},
	author = "A. Souzis",
	citeulike-article-id = "559522",
	journal = "Intelligent Systems, IEEE [see also IEEE Intelligent Systems and Their Applications]",
	keywords = "semantic\_web, semantic\_wiki",
	number = "5",
	pages = "87--91",
	posted-at = "2009-06-30 16:02:56",
	priority = "2",
	title = "Building a semantic wiki",
	url = "http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=1512004",
	volume = "20",
	year = "2005",
	file = "{:references:001:building\_semantic\_wiki.pdf|}"
}

@Article{ citeulike:251579,
	abstract = "Hypothesis generation, a crucial initial step for making scientific discoveries, relies on prior knowledge, experience, and intuition. Chance connections made between seemingly distinct subareas sometimes turn out to be fruitful. The goal in text mining is to assist in this process by automatically discovering a small set of interesting hypotheses from a suitable text collection. In this report, we present open and closed text mining algorithms that are built within the discovery framework established by Swanson and Smalheiser. Our algorithms represent topics using metadata profiles. When applied to MEDLINE, these are MeSH based profiles. We present experiments that demonstrate the effectiveness of our algorithms. Specifically, our algorithms successfully generate ranked term lists where the key terms representing novel relationships between topics are ranked high.",
	author = "Padmini Srinivasan",
	citeulike-article-id = "251579",
	doi = "10.1002/asi.10389",
	issn = "1532-2890",
	journal = "Journal of the American Society for Information Science and Technology",
	keywords = "text\_minig",
	month = "December",
	number = "5",
	pages = "396--413",
	posted-at = "2009-06-30 06:52:36",
	priority = "2",
	title = "Text mining: Generating hypotheses from MEDLINE",
	url = "http://dx.doi.org/10.1002/asi.10389",
	volume = "55",
	year = "2003",
	file = "{:references:metadata.autogen:autogen03.pdf|}"
}

@Article{ citeulike:4746642,
	abstract = "With the advent and accessibility of the Internet, artistic and indigenous communities are beginning to realize how digital technologies can be used as a means for documenting and preserving their histories and cultures. However, it is not yet clear what knowledge architectures are most appropriate for creating a digital museum in order to facilitate an effective collection, organization, conservation, and experience of cultural and artistic heritage. In this paper, we discuss the concept of ” fluid ontologies,” a novel, dynamic structure for organizing and browsing knowledge in a digital museum. Fluid ontologies are flexible knowledge structures that evolve and adapt to communities' interest based on contextual information articulated by human contributors, curators, and viewers, as well as artificial bots that are able to track interaction histories and infer relationships among knowledge pieces and preferences of viewers. Fluid ontologies allow for a tighter coupling between communities' interests and the browsing structure of a digital museum. We present the key ideas behind the use of fluid ontologies within the context of digital museum design and seminal work in metadata/dynamic ontologies, particularly as it pertains to objects of cultural heritage, and discuss these characteristics in three concrete examples: (1) Village Voice, an online agora that ties together the narratives created by a group of Somali refugees using an iteration of community-designed ontologies, (2) Eventspace, a node-based collaborative archive for design activities, and (3) Tribal Peace, an online digital museum still under construction and evaluation that uses proactive agents to tie distributed Kumeyaay, Luiseno, and Cupeno reservations together in their quest to achieve greater political sovereignty .",
	author = "Ramesh Srinivasan and Jeffrey Huang",
	citeulike-article-id = "4746642",
	doi = "10.1007/s00799-004-0105-9",
	journal = "International Journal on Digital Libraries",
	keywords = "digital\_museum, information\_visualization, ontology",
	month = "May",
	number = "3",
	pages = "193--204",
	posted-at = "2009-06-04 21:08:17",
	priority = "3",
	title = "Fluid ontologies for digital museums",
	url = "http://dx.doi.org/10.1007/s00799-004-0105-9",
	volume = "5",
	year = "2005",
	file = "{:references:archive.ontology:srinivasan-r\_2005\_int\_j-digital-lib.pdf|}"
}

@Article{ citeulike:2594234,
	abstract = "This article proposes a method that allows a value-based assessment of metadata quality and construction of a baseline quality model. The method is illustrated on a large-scale, aggregated collection of simple Dublin core metadata records. An analysis of the collection suggests that metadata providers and end users may have different value structures for the same metadata. To promote better use of the metadata collection, value models for metadata in the collection should be made transparent to end users and end users should be allowed to participate in content creation and quality control processes.",
	author = "Besiki Stvilia and Les Gasser",
	citeulike-article-id = "2594234",
	doi = "10.1016/j.lisr.2007.06.006",
	journal = "Library \& Information Science Research",
	keywords = "metadata, quality",
	month = "March",
	number = "1",
	pages = "67--74",
	posted-at = "2009-06-05 14:41:36",
	priority = "2",
	title = "Value-based metadata quality assessment",
	url = "http://dx.doi.org/10.1016/j.lisr.2007.06.006",
	volume = "30",
	year = "2008",
	file = "{:references:metadata.quality:stvilia.2008.lisr.value\_based\_metadata.quality.pdf|}"
}

@Article{ citeulike:2359494,
	address = "New York, NY, USA",
	author = "Besiki Stvilia and Les Gasser and Michael B. Twidale and Linda C. Smith",
	citeulike-article-id = "2359494",
	doi = "10.1002/asi.v58:12",
	issn = "1532-2882",
	journal = "J. Am. Soc. Inf. Sci. Technol.",
	keywords = "assessment, metadata\_quality",
	month = "October",
	number = "12",
	pages = "1720--1733",
	posted-at = "2009-07-01 21:13:55",
	priority = "3",
	publisher = "John Wiley \& Sons, Inc.",
	title = "A framework for information quality assessment",
	url = "http://dx.doi.org/10.1002/asi.v58:12",
	volume = "58",
	year = "2007",
	file = "{:references:metadata.quality:metadata\_quality\_assurance\_framework.pdf|}"
}

@InProceedings{ citeulike:2623567,
	abstract = "This paper presents early results from our empirical studies of metadata quality in large corpuses of metadata harvested under Open Archives Initiative (OAI) protocols. Along with some discussion of why and how metadata quality is important, an approach to conceptualizing, measuring, and assessing metadata quality is presented. The approach given in this paper is based on a more general model of information quality (IQ) for many kinds of information beyond just metadata. A key feature of the general model is its ability to condition quality assessments by context of information use, such as the types of activities that use the information, and the typified norms and values of relevant information-using communities. The paper presents a number of statistical characterizations of analyzed samples of metadata from a large corpus built as part of the Institute of Museum and Library Services Digital Collections and Contents (IMLS DCC) project containing OAI-harvested metadata and links these statistical assessments to the quality measures, and interprets them. Finally the paper discusses several approaches to quality improvement for metadata based on the study findings.",
	author = "Besiki Stvilia and Les Gasser and Michael B. Twidale and Sarah L. Shreeves and Timothy W. Cole",
	booktitle = "Proceedings of ICIQ04 - 9th International Conference on Information Quality.",
	citeulike-article-id = "2623567",
	keywords = "cataloging, metadata",
	month = "November",
	organization = "IMLS",
	pages = "111--125",
	posted-at = "2008-04-02 15:34:11",
	priority = "3",
	title = "Metadata Quality for Federated Collections",
	url = "https://www.ideals.uiuc.edu/handle/2142/721",
	year = "2004",
	file = "{:references:metadata.quality:metadata\_quality\_federated\_collections.pdf|}"
}

@PhDThesis{ citeulike:6086656,
	author = "Besiki Stvilia",
	citeulike-article-id = "6086656",
	citeulike-linkout-0 = "http://proquest.umi.com/pqdweb?did=1192183091\&\\#38;sid=3\&\\#38;Fmt=2\&\\#38;clientId=10843\&\\#38;RQT=309\&\\#38;VName=PQD",
	institution = "UIUC",
	keywords = "information\_quality, measurment, metadata\_quality",
	posted-at = "2009-11-08 21:28:06",
	priority = "0",
	publisher = "UIUC",
	title = "Measuring Information Quality",
	url = "http://proquest.umi.com/pqdweb?did=1192183091&#38;sid=3&#38;Fmt=2&#38;clientId=10843&#38;RQT=309&#38;VName=PQD",
	year = "2007",
	file = "{:references:metadata.quality:measuring.information.quality.pdf|}"
}

@Article{ citeulike:6497016,
	author = "Stuart A. Sutton",
	citeulike-article-id = "6497016",
	citeulike-linkout-0 = "http://dx.doi.org/doi:10.1080/01639370802183065",
	doi = "doi:10.1080/01639370802183065",
	journal = "Cataloging \& Classification Quarterly",
	keywords = "metadata\_quality, semantic\_web",
	month = "September",
	number = "1",
	pages = "81--107",
	posted-at = "2010-01-07 05:04:27",
	priority = "0",
	title = "Metedata Quality, Utility and the Semantic Web: The Case of Learning Resources and Archievement Standards",
	url = "http://dx.doi.org/doi:10.1080/01639370802183065",
	volume = "46",
	year = "2008",
	file = "{:references:metadata.quality:sutton.2008.ccq.metadata\_quality\_utility\_semantic\_web.pdf|}"
}

@Article{ citeulike:6544271,
	abstract = "This paper describes a conceptual framework and methodology for managing scheme versioning for the Semantic Web. The first part of the paper introduces the concept of vocabulary encoding schemes, distinguished from metadata schemas, and discusses the characteristics of changes in schemes. The paper then presents a proposal to use a value record-similar to a term record in thesaurus management techniques-to manage scheme versioning challenges for the Semantic Web. The conclusion identifies future research directions.",
	author = "Joseph T. Tennis",
	citeulike-article-id = "6544271",
	citeulike-linkout-0 = "http://dx.doi.org/10.1300/J104v43n03\\_05",
	doi = "10.1300/J104v43n03\_05",
	journal = "Cataloging \& Classification Quarterly",
	number = "3",
	pages = "85--104",
	posted-at = "2010-01-15 14:31:16",
	priority = "2",
	publisher = "Routledge",
	title = "Scheme Versioning in the Semantic Web",
	url = "http://dx.doi.org/10.1300/J104v43n03_05",
	volume = "43",
	year = "2007"
}

@Article{ citeulike:2968181,
	abstract = "Biomedical literature databases constitute valuable repositories of up to date scientific knowledge. The development of efficient machine learning methods in order to facilitate the organization of these databases and the extraction of novel biomedical knowledge is becoming increasingly important. Several of these methods require the representation of the documents as vectors of variables forming large multivariate datasets. Since the amount of information contained in different datasets is voluminous, an open issue is to combine information gained from various sources to a concise new dataset, which will efficiently represent the corpus of documents. This paper investigates the use of the multivariate statistical approach, called Non-Linear Canonical Correlation Analysis (NLCCA), for exploiting the correlation among the variables of different document representations and describing the documents with only one new dataset. Experiments with document datasets represented by text words, Medical Subject Headings (MeSH) and Gene Ontology (GO) terms showed the effectiveness of NLCCA.",
	address = "Department of Informatics, School of Natural Sciences, Aristotle University of Thessaloniki, 54124 Thessaloniki, Greece. theodos@csd.auth.gr",
	author = "T. Theodosiou and L. Angelis and A. Vakali",
	citeulike-article-id = "2968181",
	doi = "10.1016/j.jbi.2007.06.004",
	issn = "1532-0480",
	journal = "Journal of biomedical informatics",
	keywords = "extraction, metadata",
	month = "February",
	number = "1",
	pages = "202--216",
	posted-at = "2009-06-30 11:57:17",
	priority = "2",
	title = "Non-linear correlation of content and metadata information extracted from biomedical article datasets.",
	url = "http://dx.doi.org/10.1016/j.jbi.2007.06.004",
	volume = "41",
	year = "2008",
	file = "{:references:metadata.autogen:autogen11.pdf|}"
}

@Article{ citeulike:465841,
	abstract = "In image retrieval, most systems lack user-centred evaluation since they are assessed by some chosen ground truth dataset. The results reported through precision and recall assessed against the ground truth are thought of as being an acceptable surrogate for the judgment of real users. Much current research focuses on automatically assigning keywords to images for enhancing retrieval effectiveness. However, evaluation methods are usually based on system-level assessment, e.g. classification accuracy based on some chosen ground truth dataset. In this paper, we present a qualitative evaluation methodology for automatic image indexing systems. The automatic indexing task is formulated as one of image annotation, or automatic metadata generation for images. The evaluation is composed of two individual methods. First, the automatic indexing annotation results are assessed by human subjects. Second, the subjects are asked to annotate some chosen images as the test set whose annotations are used as ground truth. Then, the system is tested by the test set whose annotation results are judged against the ground truth. Only one of these methods is reported for most systems on which user-centred evaluation are conducted. We believe that both methods need to be considered for full evaluation. We also provide an example evaluation of our system based on this methodology. According to this study, our proposed evaluation methodology is able to provide deeper understanding of the system's performance.",
	author = "Chih-Fong Tsai and Ken Mcgarry and John Tait",
	citeulike-article-id = "465841",
	doi = "10.1016/j.ipm.2004.11.001",
	journal = "Information Processing \& Management",
	keywords = "automatic\_metadata\_generation, evaluation",
	month = "January",
	number = "1",
	pages = "136--154",
	posted-at = "2009-06-30 07:16:06",
	priority = "2",
	title = "Qualitative evaluation of automatic assignment of keywords to images",
	url = "http://dx.doi.org/10.1016/j.ipm.2004.11.001",
	volume = "42",
	year = "2006",
	file = "{:references:metadata.autogen:autogen06.pdf|}"
}

@InProceedings{ hooland_answeringcall_2008,
	abstract = "Although the issue of metadata quality is recognized as an important topic within the metadata research community, the cultural heritage sector has been slow to develop methodologies, guidelines and tools for addressing this topic in practice. This paper concentrates on metadata quality specifically within the museum sector and describes the potential of data-profiling techniques for metadata quality evaluation. A case study illustrates the application of a general-purpose data-profiling tool on a large collection of metadata records from an ethnographic collection. After an analysis of the results of the case-study the paper reviews further steps in our research and presents the implementation of a metadata quality tool within an open-source collection management software.",
	address = "Berlin, Germany",
	author = "Seth {van Hooland} and Yves Bontemps and Seth Kaufman",
	booktitle = "Proceedings of the 2008 International Conference on Dublin Core and Metadata Applications",
	citeulike-article-id = "6549325",
	citeulike-linkout-0 = "http://portal.acm.org/citation.cfm?id=1503418.1503428\&\\#38;coll=GUIDE\&\\#38;dl=GUIDE\&\\#38;CFID=71452716\&\\#38;CFTOKEN=28016179",
	keywords = "collection, data-profiling, file-import-10-01-16, management, metadata, quality, software",
	pages = "93--103",
	posted-at = "2010-01-16 02:40:47",
	priority = "2",
	publisher = "Dublin Core Metadata Initiative",
	title = "Answering the call for more accountability: applying data profiling to museum metadata",
	url = "http://portal.acm.org/citation.cfm?id=1503418.1503428&coll=GUIDE&dl=GUIDE&CFID=71452716&CFTOKEN=28016179",
	year = "2008"
}

@Misc{ citeulike:1377792,
	abstract = "he changing role of the user, that gradually shifts from a passive consumer of information towards a pro-active user that reorganises and manipulates data, has an increasing impact on traditional information re- trieval. A multitude of practical and methodic questions rise as popular web-applications such as blogs, RSS and social bookmarking tools allow users to create and share metadata about online resources. This article tackles these issues in the particular domain of visual cultural heritage. Online image databases increasingly offer users possibilities to annotate and comment on images of interest to them. But what is the pertinence of these user contributions? How can their quality be evaluated? Con- cretely, our article starts with an introduction to the phenomenon of user- generated metadata by presenting the social tagging of cultural heritage images and the practice of publishing users comments. Secondly, a case study presents an analysis of users comments within the image database of the National Archives of the Netherlands. Based on these empirical data, conclusions and generalizations outside our specific case study are formulat",
	author = "S. {van Hooland}",
	citeulike-article-id = "1377792",
	citeulike-linkout-0 = "http://homepages.ulb.ac.be/~svhoolan/usergeneratedmetadata.pdf",
	posted-at = "2010-01-16 02:37:32",
	priority = "2",
	publisher = "September",
	title = "From Spectator to Annotator: Possibilities offered by User-Generated Metadata for Digital Cultural Heritage Collections",
	url = "http://homepages.ulb.ac.be/~svhoolan/usergeneratedmetadata.pdf",
	year = "2006"
}

@PhDThesis{ citeulike:6545103,
	author = "Setch {Van Hooland}",
	citeulike-article-id = "6545103",
	keywords = "cultural\_heritage, metadata, metadata\_quality",
	organization = "University Libre de Bruxelles",
	posted-at = "2010-01-15 19:51:29",
	priority = "2",
	title = "Metadata Quality in the Cultural Heritage Sector: Stakes, Problems and Solutions",
	year = "2009"
}

@Article{ citeulike:6081565,
	abstract = "The Web is ephemeral. Many resources have representations that change overtime, and many of those representations are lost forever. A lucky few manage toreappear as archived resources that carry their own URIs. For example, somecontent management systems maintain version pages that reflect a frozen priorstate of their changing resources. Archives recurrently crawl the web to obtainthe actual representation of resources, and subsequently make those availablevia special-purpose archived resources. In both cases, the archival copies haveURIs that are protocol-wise disconnected from the URI of the resource of whichthey represent a prior state. Indeed, the lack of temporal capabilities in themost common Web protocol, HTTP, prevents getting to an archived resource on thebasis of the URI of its original. This turns accessing archived resources intoa significant discovery challenge for both human and software agents, whichtypically involves following a multitude of links from the original to thearchival resource, or of searching archives for the original URI. This paperproposes the protocol-based Memento solution to address this problem, anddescribes a proof-of-concept experiment that includes major servers of archivalcontent, including Wikipedia and the Internet Archive. The Memento solution isbased on existing HTTP capabilities applied in a novel way to add the temporaldimension. The result is a framework in which archived resources can seamlesslybe reached via the URI of their original: protocol-based time travel for theWeb.",
	archivePrefix = "arXiv",
	author = "Herbert {Van de Sompel} and Michael L. Nelson and Robert Sanderson and Lyudmila L. Balakireva and Scott Ainsworth and Harihar Shankar",
	citeulike-article-id = "6081565",
	citeulike-linkout-0 = "http://arxiv.org/abs/0911.1112",
	citeulike-linkout-1 = "http://arxiv.org/pdf/0911.1112",
	day = "6",
	eprint = "0911.1112",
	month = "Nov",
	posted-at = "2010-01-15 16:54:27",
	priority = "0",
	title = "Memento: Time Travel for the Web",
	url = "http://arxiv.org/abs/0911.1112",
	year = "2009"
}

@Article{ citeulike:6604311,
	abstract = "The X-Map system for automated semantic correlation between multiple schemas represents a novel perspective for mediating XML data interoperability between multiple heterogeneous systems. Instead of using time consuming data-domain modeling or other pre-constructed knowledge about the systems\&\#039; data-domain, X-Map relies on three techniques to do its job, none of which require pre-construction of anything: relevant definition of equivalence and other classes of relations, speculative evaluation of relations between data-domains, and a regenerative association engine to reevaluate speculations. Restriction to valid XML data also gives X-Map some advantage, including well-formedness, parsing, and data element identification. Essentially, X-Map uses the definition of equivalence to plant seeds of possible correlation between schema elements, speculates on the relation\&\#039;s correctness, then continuously re-evaluates the correctness of the speculative relations when given additional information from newly introduced schemas. In e\#ect, X-Map generates and harbors a large growing body of knowledge between the elements of the many schemas that it encounters. Furthermore, this knowledge is store in valid XML format (XLink linkbase and standard valid XML), enabling other XML-compliant applications to perform related mediation tasks such as data transformation and translation.",
	author = "David Wang and Amar Gupta and Arthur C. Smith",
	citeulike-article-id = "6604311",
	citeulike-linkout-0 = "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.23.8744",
	keywords = "interoperability, schema, semantic",
	posted-at = "2010-01-29 16:23:55",
	priority = "2",
	title = "Automated Semantic Correlation between Multiple Schema for Information Exchange",
	url = "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.23.8744",
	year = "2000",
	file = "{:references:metadata.semantics:wang.2000.master.thesis.automated\_semantic\_correlation\_schema.pdf|}"
}

@Article{ citeulike:6548923,
	abstract = "Purpose -- This paper proposes indicators for measuring the success of institutional repositories based on their demonstrated integration with other research initiatives and provides a snapshot of the current state of selected institutional repositories in Canada through a review of their web presence and their integration with university library and research pages. Design/methodology/approach -- Using the proposed indicators, an examination of the web sites of selected Canadian universities who are participating in the Canadian Association of Research Libraries Institutional Repository project was undertaken. Findings -- Institutional repositories are growing in Canada and that the Canadian {IR} community is on the way to the proposed model future -- integration with existing university research practices. Originality/value -- Indicators such as those proposed in the paper can provide a basic framework for evaluating {IR} projects and highlight areas where the library can generate additional support for these worthwhile projects.",
	author = "Mary Westell",
	citeulike-article-id = "6548923",
	citeulike-linkout-0 = "http://dx.doi.org/10.1108/07378830610669583",
	citeulike-linkout-1 = "http://www.emeraldinsight.com/10.1108/07378830610669583",
	doi = "10.1108/07378830610669583",
	journal = "Library Hi Tech",
	keywords = "canada, critical, digital, factors, file-import-10-01-16, libraries, research, success",
	pages = "211--226",
	number = "2",
	posted-at = "2010-01-16 01:44:35",
	priority = "2",
	title = "Institutional repositories: proposed indicators of success",
	url = "http://dx.doi.org/10.1108/07378830610669583",
	volume = "24",
	year = "2006"
}

@InProceedings{ citeulike:2121847,
	abstract = "The authors present a method for comparing indexing consistency between groups of indexers based on the vector space IR model. Terms assigned by indexers are treated as vectors whose distances from a central vector may be compared. The method is outlined and demonstrated with an example.",
	author = "Dietmar Wolfram and Hope A. Olson",
	booktitle = "Canadian Association for Information Science",
	citeulike-article-id = "2121847",
	citeulike-linkout-0 = "http://www.cais-acsi.ca/proceedings/2007/wolfram\\_2007.pdf",
	organization = "Canadian Association for Information Science",
	posted-at = "2010-01-15 14:07:48",
	priority = "2",
	title = "A Method for Comparing Large Scale Inter-Indexer Consistency Using IR Modeling",
	url = "http://www.cais-acsi.ca/proceedings/2007/wolfram_2007.pdf",
	year = "2007"
}

@Article{ citeulike:4107599,
	abstract = "The application of thesauri in networked environments is seriously hampered by the challenges of introducing new concepts and terminology into the formal controlled vocabulary, which is critical for enhancing its retrieval capability. The author describes an automated process of adding new terms to thesauri as entry vocabulary by analyzing the association between words/phrases extracted from bibliographic titles and subject descriptors in the metadata record (subject descriptors are terms assigned from controlled vocabularies of thesauri to describe the subjects of the objects [e.g., books, articles] represented by the metadata records). The investigated approach uses a corpus of metadata for scientific and technical (S\&T) publications in which the titles contain substantive words for key topics. The three steps of the method are (a) extracting words and phrases from the title field of the metadata; (b) applying a method to identify and select the specific and meaningful keywords based on the associated controlled vocabulary terms from the thesaurus used to catalog the objects; and (c) inserting selected keywords into the thesaurus as new terms (most of them are in hierarchical relationships with the existing concepts), thereby updating the thesaurus with new terminology that is being used in the literature. The effectiveness of the method was demonstrated by an experiment with the Chinese Classification Thesaurus (CCT) and bibliographic data in China Machine-Readable Cataloging Record (MARC) format (CNMARC) provided by Peking University Library. This approach is equally effective in large-scale collections and in other languages.",
	address = "Department of Information Management, Peking University, Beijing 10071, China",
	author = "Jun Wang",
	citeulike-article-id = "4107599",
	doi = "10.1002/asi.20352",
	journal = "Journal of the American Society for Information Science and Technology",
	keywords = "extraction, metadata, thesaurus",
	number = "7",
	pages = "907--920",
	posted-at = "2009-06-30 16:45:53",
	priority = "2",
	title = "Automatic thesaurus development: Term extraction from title metadata",
	url = "http://dx.doi.org/10.1002/asi.20352",
	volume = "57",
	year = "2006",
	file = "{:references:001:meta.autogen01.pdf|}"
}

@Article{ citeulike:4926312,
	address = "Los Alamitos, CA, USA",
	author = "Yorick Wilks",
	citeulike-article-id = "4926312",
	doi = "http://doi.ieeecomputersociety.org/10.1109/MIS.2008.53",
	journal = "IEEE Intelligent Systems",
	keywords = "semantic\_web, semantics",
	number = "3",
	pages = "41--49",
	posted-at = "2009-06-30 05:55:48",
	priority = "3",
	publisher = "IEEE Computer Society",
	title = "The Semantic Web: Apotheosis of Annotation, but What Are Its Semantics?",
	url = "http://dx.doi.org/http://doi.ieeecomputersociety.org/10.1109/MIS.2008.53",
	volume = "23",
	year = "2008"
}

@Article{ citeulike:2641720,
	abstract = "This paper presents an outline of models of information seeking and other aspects of information behaviour, showing the relationship between communication and information behaviour in general with information seeking and information searching in information retrieval systems. It is suggested that these models address issues at various levels of information behaviour and that they can be related by envisaging a `nesting' of models. It is also suggested that, within both information seeking research and information searching research, alternative models address similar issues in related ways and that the models are complementary rather than conflicting. Finally, an alternative, problem-solving model is presented, which, it is suggested, provides a basis for relating the models in appropriate research strategies.",
	author = "T. D. Wilson",
	citeulike-article-id = "2641720",
	citeulike-linkout-0 = "http://dx.doi.org/10.1108/EUM0000000007145",
	citeulike-linkout-1 = "http://www.ingentaconnect.com/content/mcb/278/1999/00000055/00000003/art00001",
	citeulike-linkout-2 = "http://informationr.net/tdw/publ/papers/1999JDoc.html",
	doi = "10.1108/EUM0000000007145",
	issn = "0022-0418",
	journal = "Journal of Documentation",
	keywords = "information\_behaviour",
	pages = "249--270",
	posted-at = "2009-09-15 23:34:24",
	priority = "3",
	publisher = "Emerald Group Publishing Limited",
	title = "Models in information behaviour research",
	url = "http://dx.doi.org/10.1108/EUM0000000007145",
	year = "1999",
	file = "{:references:phd1st:glis702:wilson.model.in.information.behaviour.research.pdf|}"
}

@Article{ citeulike:1202321,
	abstract = "This paper provides a history and overview of the field of human information behavior, including recent advances in the field and multidisciplinaryperspectives.Keywords: human information behavior, information seeking, research, user studies.IntroductionUntil recently the computer science and information systemscommunities have equated `information requirements' of userswith the way users behave in relation to the systems available.In other words, investigations into information...",
	author = "T. D. Wilson",
	citeulike-article-id = "1202321",
	citeulike-linkout-0 = "http://citeseer.ist.psu.edu/403930.html",
	citeulike-linkout-1 = "http://citeseer.lcs.mit.edu/403930.html",
	citeulike-linkout-2 = "http://citeseer.ifi.unizh.ch/403930.html",
	citeulike-linkout-3 = "http://citeseer.comp.nus.edu.sg/403930.html",
	journal = "Informing Science",
	keywords = "information\_behaviour",
	number = "2",
	pages = "49--56",
	posted-at = "2009-09-15 23:38:47",
	priority = "3",
	title = "Human information behavior",
	url = "http://citeseer.ist.psu.edu/403930.html",
	volume = "3",
	year = "2000",
	file = "{:references:phd1st:glis702:wilson.information.seeking.behaviour.pdf|}"
}

@Article{ citeulike:4008885,
	author = "T. D. Wilson",
	citeulike-article-id = "4008885",
	citeulike-linkout-0 = "http://dx.doi.org/10.1108/00220410610714895",
	doi = "10.1108/00220410610714895",
	journal = "Journal of Documentation",
	keywords = "informationh\_behavior",
	number = "6",
	pages = "658--670",
	posted-at = "2009-09-15 23:41:53",
	priority = "3",
	title = "On user studies and information needs",
	url = "http://dx.doi.org/10.1108/00220410610714895",
	volume = "62",
	year = "2006",
	file = "{:references:phd1st:glis702:wilson.information.needs.pdf|}"
}

@Article{ citeulike:1727830,
	address = "New York, NY, USA",
	author = "Tianhao Wu and William M. Pottenger",
	citeulike-article-id = "1727830",
	doi = "10.1002/asi.v56:3",
	issn = "1532-2882",
	journal = "J. Am. Soc. Inf. Sci. Technol.",
	keywords = "extraction, metadata",
	month = "February",
	number = "3",
	pages = "258--271",
	posted-at = "2009-06-30 06:42:33",
	priority = "2",
	publisher = "John Wiley \& Sons, Inc.",
	title = "A semi-supervised active learning algorithm for information extraction from textual data: Research Articles",
	url = "http://dx.doi.org/10.1002/asi.v56:3",
	volume = "56",
	year = "2005",
	file = "{:references:metadata.autogen:autogen00.pdf|}"
}

@Article{ citeulike:1371245,
	abstract = "Document keyphrases provide a concise summary of a document's content, offering semantic metadata summarizing a document. They can be used in many applications related to knowledge management and text mining, such as automatic text summarization, development of search engines, document clustering, document classification, thesaurus construction, and browsing interfaces. Because only a small portion of documents have keyphrases assigned by authors, and it is time-consuming and costly to manually assign keyphrases to documents, it is necessary to develop an algorithm to automatically generate keyphrases for documents. This paper describes a Keyphrase Identification Program (KIP), which extracts document keyphrases by using prior positive samples of human identified phrases to assign weights to the candidate keyphrases. The logic of our algorithm is: The more keywords a candidate keyphrase contains and the more significant these keywords are, the more likely this candidate phrase is a keyphrase. KIP's learning function can enrich the glossary database by automatically adding new identified keyphrases to the database. KIP's personalization feature will let the user build a glossary database specifically suitable for the area of his/her interest. The evaluation results show that KIP's performance is better than the systems we compared to and that the learning function is effective.",
	address = "Information Systems Department, New Jersey Institute of Technology, Newark, NJ 07102",
	author = "Yi-Fang B. Wu and Quanzhi Li and Razvan S. Bot and Xin Chen",
	citeulike-article-id = "1371245",
	doi = "10.1002/asi.20341",
	journal = "Journal of the American Society for Information Science and Technology",
	keywords = "automatic\_metadata\_generation, machine\_learning",
	number = "6",
	pages = "740--752",
	posted-at = "2009-06-30 07:11:32",
	priority = "2",
	title = "Finding nuggets in documents: A machine learning approach",
	url = "http://dx.doi.org/10.1002/asi.20341",
	volume = "57",
	year = "2006",
	file = "{:references:metadata.autogen:autogen05.pdf|}"
}

@InCollection{ citeulike:6443492,
	abstract = "Large amounts of data in modern information systems, such as the World Wide Web, require innovative information retrieval techniques to effectively satisfy users' information need. A promising approach is to exploit document semantics in the IR process. For this purpose, high-quality semantic metadata is needed. This paper introduces a method to automatically create semantic metadata by using ontologically enhanced versions of common information extraction methods, such as named entity recognition and coreference resolution. Furthermore, this work also proposes the application of ontology-specific heuristic rules to further improve the quality of generated metadata. The results of our method was evaluated using a small test collection.",
	author = {{\"U}mit Yoldas and G{\'a}bor Nagyp{\'a}l},
	booktitle = "On the Move to Meaningful Internet Systems 2006: CoopIS, DOA, GADA, and ODBASE ",
	chapter = "48",
	citeulike-article-id = "6443492",
	citeulike-linkout-0 = "http://dx.doi.org/10.1007/11914853\\_48",
	citeulike-linkout-1 = "http://www.springerlink.com/content/hvq2024wwh826425",
	doi = "10.1007/11914853\_48",
	pages = "791--806",
	posted-at = "2010-01-15 17:44:52",
	priority = "2",
	series = "Lecture Notes in Computer Science",
	title = "Ontology Supported Automatic Generation of High-Quality Semantic Metadata",
	url = "http://dx.doi.org/10.1007/11914853_48",
	year = "2006"
}

@InProceedings{ 1670651,
	author = "Qing Zou and Wei Fan",
	title = "A semantic MediaWiki-empowered terminology registry",
	booktitle = "DCMI '09: Proceedings of the 2009 International Conference on Dublin Core and Metadata Applications",
	year = "2009",
	pages = "107--112",
	location = "Seoul, Korea",
	publisher = "Dublin Core Metadata Initiative"
}

@Article{ citeulike:6551045,
	abstract = "A survey on metadata conducted at the end of 2007 received over 400 answers from 49 countries all over the world. It helped the authors to identify major issues and concerns regarding metadata that should be addressed in the IFLA Guidelines for Digital Libraries. The questionnaire included a question of the roles respondents may have, and five questions of the major concerns in any project that relates to metadata, regarding design and planning of digital projects, element set standards, data contents in a record, authority files and controlled vocabularies, and metadata encoding. Findings from the survey are reported and a workflow chart is included in this paper.",
	author = "Marcia L. Zeng and Jaesun Lee and Allene F. Hayes",
	citeulike-article-id = "6551045",
	citeulike-linkout-0 = "http://dx.doi.org/10.1080/19386380903405074",
	doi = "10.1080/19386380903405074",
	journal = "Journal of Library Metadata",
	keywords = "digital\_library, metadata",
	number = "3",
	pages = "173--193",
	posted-at = "2010-01-16 16:55:58",
	priority = "2",
	publisher = "Routledge",
	title = "Metadata Decisions for Digital Libraries: A Survey Report",
	url = "http://dx.doi.org/10.1080/19386380903405074",
	volume = "9",
	year = "2009"
}

@Article{ citeulike:6545178,
	abstract = "The study consisted of two interrelated parts: (1) a quality analysis of the Chinese-language records in the OCLC database, with emphasis on identifying errors in member-contributed records; and (2) the development of a rule-based data validation system for quality control of Chinese-language records in the OCLC database, with emphasis on establishing a set of production rules for such a system. One thousand three hundred six member-contributed Chinese records were randomly selected from the OCLC database and were examined by the researcher. Commonly occurring errors were identified and were categorized into three classes: format errors, content deficiency and inconsistency errors, and typographical errors of editing and inputting. The relationship between the number of times a record had been enhanced and errors still occurring in it was also studied.",
	author = "Lei Zeng",
	citeulike-article-id = "6545178",
	citeulike-linkout-0 = "http://dx.doi.org/10.1300/J104v16n04\\_03",
	doi = "10.1300/J104v16n04\_03",
	journal = "Cataloging \& Classification Quarterly",
	keywords = "quality",
	number = "4",
	pages = "25--66",
	posted-at = "2010-01-15 20:33:50",
	priority = "0",
	publisher = "Routledge",
	title = "Quality Control of Chinese-Language Records Using a Rule-Based Data Validation System - Part 1 -- An Evaluation of the Quality of Chinese-Language Records in the OCLC OLUC Database",
	url = "http://dx.doi.org/10.1300/J104v16n04_03",
	volume = "16",
	year = "1993"
}

@Article{ citeulike:6545182,
	abstract = "The problem addressed by this two-part study is to evaluate the quality of Chinese records in the OCLC database and to determine the potential of a set of production rules for a rule-based data validation system lo support quality control of the Chinese records. The second part of the study emphasizes establishing pro- duction rules for such a system. Based on the results of error anal- ysis, a set of production rules were developed and tested, focusing on improving completeness, consistency, and correctness of a rec- ord. The rules covered 11 of the total 19 types of errors. At least 65\% , of the errors occurring in the investigated sample records could be detected automatically by applying the production rules.",
	author = "Lei Zeng",
	citeulike-article-id = "6545182",
	citeulike-linkout-0 = "http://dx.doi.org/10.1300/J104v18n01\\_02",
	doi = "10.1300/J104v18n01\_02",
	journal = "Cataloging \& Classification Quarterly",
	keywords = "quality, quality\_metrics",
	number = "1",
	pages = "3--26",
	posted-at = "2010-01-15 20:37:07",
	priority = "2",
	publisher = "Routledge",
	title = "Quality Control of Chinese-Language Records Using a Rule-Based Data Validation System-Part 2 -- A Study of a Rule-Based Data Validation System for Online Chinese Cataloging",
	url = "http://dx.doi.org/10.1300/J104v18n01_02",
	volume = "18",
	year = "1994"
}

@Article{ citeulike:4867619,
	author = "Marcia L. Zeng and Lois M. Chan",
	citeulike-article-id = "4867619",
	doi = "10.1045/june2006-zeng",
	journal = "D-Lib Magazine",
	keywords = "interoperability, metadata",
	month = "June",
	number = "6",
	posted-at = "2009-06-30 17:07:04",
	priority = "2",
	title = "{Metadata Interoperability and Standardization -- A Study of Methodology Part II Achieving Interoperability at the Record and Repository Levels}",
	url = "http://dx.doi.org/10.1045/june2006-zeng",
	volume = "12",
	year = "2006",
	file = "{:references:metadata:zeng.2006.dlib.metadata\_interoperability.pdf|}"
}

@Book{ citeulike:5025939,
	abstract = "In this new, authoritative textbook, internationally recognized metadata experts Zeng and Qin have created a comprehensive primer for advanced undergraduate, graduate, or continuing education courses in information organization, information technology, cataloging, digital libraries, electronic archives, and, of course, metadata. Instructors seeking a text that covers the theory as well as the how-to's of application design, implementation, and evaluation will find it here. An outcome-based approach lets learners with different orientations adapt their new knowledge and skills to any domain. Examples and practice problems focus on tasks typical to all metadata application projects. Other useful features include sample problems with solutions, quizzes, hands-on tutorials, and a recommended reading list at the end of each chapter.",
	address = "New York",
	author = "Macial L. Zeng and Jian Qin",
	citeulike-article-id = "5025939",
	edition = "First",
	isbn = "1555706355",
	keywords = "metadata",
	pages = "1--365",
	posted-at = "2009-06-30 19:59:51",
	priority = "0",
	publisher = "Neal-Schuman Publishers",
	title = "Metadata",
	year = "2008"
}

@Article{ citeulike:449907,
	abstract = "Metadata is designed to improve information organization and information retrieval effectiveness and efficiency on the Internet. The way web publishers respond to metadata and the way they use it when publishing their web pages, however, is still a mystery. The authors of this paper aim to solve this mystery by defining different professional publisher groups, examining the behaviors of these user groups, and identifying the characteristics of their metadata use. This study will enhance the current understanding of metadata application behavior and provide evidence useful to researchers, web publishers, and search engine designers.",
	author = "Jin Zhang and Iris Jastram",
	citeulike-article-id = "449907",
	citeulike-linkout-0 = "http://dx.doi.org/10.1016/j.ipm.2005.05.002",
	citeulike-linkout-1 = "http://www.sciencedirect.com/science/article/B6VC8-4GFCR46-1/2/5321b28f42e0511148600490ef64b23f",
	doi = "10.1016/j.ipm.2005.05.002",
	journal = "Information Processing \& Management",
	keywords = "metadata, metadata\_creation, metadata\_quality",
	month = "July",
	number = "4",
	pages = "1099--1122",
	posted-at = "2010-01-07 13:39:56",
	priority = "3",
	title = "A study of the metadata creation behavior of different user groups on the Internet",
	url = "http://dx.doi.org/10.1016/j.ipm.2005.05.002",
	volume = "42",
	year = "2006",
	file = "{:references:metadata.quality:zhang.j.2006.ipm.metadata\_creation\_behavior.pdf|}"
}

@InProceedings{ citeulike:2408042,
	abstract = "Most information retrieval systems on the Internet rely primarily on similarity ranking algorithms based solely on term frequency statistics. Information quality is usually ignored. This leads to the problem that documents are retrieved without regard to their quality. We present an approach that combines similarity-based similarity ranking with quality ranking in centralized and distributed search environments. Six quality metrics, including the currency, availability, information-to-noise...",
	author = "Xiaolan Zhu and Susan Gauch",
	booktitle = "Research and Development in Information Retrieval",
	citeulike-article-id = "2408042",
	citeulike-linkout-0 = "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.40.1164",
	keywords = "misc",
	pages = "288--295",
	posted-at = "2008-02-21 15:54:42",
	priority = "0",
	title = "Incorporating quality metrics in centralized/distributed information retrieval on the World Wide Web",
	url = "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.40.1164",
	year = "2000"
}

@Article{ citeulike:4162566,
	author = "Qing Zou and Guoying Liu",
	citeulike-article-id = "4162566",
	doi = "10.1108/00330330910934101",
	issn = "0033-0337",
	journal = "Program: electronic library \& information systems",
	number = "1",
	pages = "49--61",
	posted-at = "2009-03-10 18:57:46",
	publisher = "Emerald Group Publishing Limited",
	title = "Chinese localisation of Evergreen: an open source integrated library system",
	url = "http://dx.doi.org/10.1108/00330330910934101",
	volume = "43",
	year = "2009",
	file = "{:references:my\_published\_articles:2009.chinese\_evergreen\_emerald\_program.pdf|}"
}

@Article{ citeulike:6545152,
	author = "Pranas Zunde and Margaret E. Dexter",
	citeulike-article-id = "6545152",
	journal = "American Documentation",
	month = "July",
	pages = "259--267",
	posted-at = "2010-01-15 20:13:40",
	priority = "2",
	title = "Indexing Consistency and Quality",
	year = "1969"
}