@misc{Josi2018, type = {Master Thesis}, author = {Josi, Frieda}, title = {Textbasierte Annotation von Abbildungen mit Kategorien von Wikimedia}, doi = {10.25968/opus-1194}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:960-opus4-11949}, school = {Hochschule Hannover}, year = {2018}, abstract = {In der vorliegenden Masterarbeit geht es um die automatische Annotation von Bildern mithilfe der Kategoriesystematik der Wikipedia. Die Annotation soll anhand der Bildbeschriftungen und ihren Textreferenzen erfolgen. Hierbei wird f{\"u}r vorhandene Bilder eine passende Kategorie vorgeschlagen. Es handelt sich bei den Bildern um Abbildungen aus naturwissenschaftlichen Artikeln, die in Open Access Journals ver{\"o}ffentlicht wurden. Ziel der Arbeit ist es, ein konzeptionelles Verfahren zu erarbeiten, dieses anhand einer ausgew{\"a}hlten Anzahl von Bildern durchzuf{\"u}hren und zu evaluieren. Die Abbildungen sollen f{\"u}r weitere Forschungsarbeiten und f{\"u}r die Projekte der Wikimedia Foundation zur Verf{\"u}gung stehen. Das Annotationsverfahren findet im Projekt NOA - Nachnutzung von Open Access Abbildungen Verwendung.}, language = {de} } @inproceedings{JosiWartenaCharbonnier2018, author = {Josi, Frieda and Wartena, Christian and Charbonnier, Jean}, title = {Text-based annotation of scientific images using Wikimedia categories}, series = {Elloumi M. et al. (eds): Database and Expert Systems Applications. DEXA 2018. Communications in Computer and Information Science, vol. 903}, booktitle = {Elloumi M. et al. (eds): Database and Expert Systems Applications. DEXA 2018. Communications in Computer and Information Science, vol. 903}, publisher = {Springer}, address = {Cham}, isbn = {978-3-319-99132-0}, doi = {10.1007/978-3-319-99133-7_20}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:960-opus4-12488}, pages = {243 -- 253}, year = {2018}, abstract = {The reuse of scientific raw data is a key demand of Open Science. In the project NOA we foster reuse of scientific images by collecting and uploading them to Wikimedia Commons. In this paper we present a text-based annotation method that proposes Wikipedia categories for open access images. The assigned categories can be used for image retrieval or to upload images to Wikimedia Commons. The annotation basically consists of two phases: extracting salient keywords and mapping these keywords to categories. The results are evaluated on a small record of open access images that were manually annotated.}, language = {en} } @inproceedings{JosiWartena2019, author = {Josi, Frieda and Wartena, Christian}, title = {Structural Analysis of Contract Renewals}, series = {Proceedings of the CIKM 2018 Workshops, Torino, Italy, October 22, 2018.}, booktitle = {Proceedings of the CIKM 2018 Workshops, Torino, Italy, October 22, 2018.}, editor = {Cuzzocrea, Alfredo and Bonchi, Francesco and Gunopulos, Dimitris}, issn = {1613-0073}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:960-opus4-15139}, year = {2019}, abstract = {In the present paper we sketch an automated procedure to compare different versions of a contract. The contract texts used for this purpose are structurally differently composed PDF files that are converted into structured XML files by identifying and classifying text boxes. A classifier trained on manually annotated contracts achieves an accuracy of 87\% on this task. We align contract versions and classify aligned text fragments into different similarity classes that enhance the manual comparison of changes in document versions. The main challenges are to deal with OCR errors and different layout of identical or similar texts. We demonstrate the procedure using some freely available contracts from the City of Hamburg written in German. The methods, however, are language agnostic and can be applied to other contracts as well.}, subject = {Vertrag}, language = {en} } @inproceedings{JosiWartenaHeid2019, author = {Josi, Frieda and Wartena, Christian and Heid, Ulrich}, title = {Detecting Paraphrases of Standard Clause Titles in Insurance Contracts}, series = {RELATIONS - Workshop on meaning relations between phrases and sentences (May 23, 2019, Gothenburg, Sweden)}, booktitle = {RELATIONS - Workshop on meaning relations between phrases and sentences (May 23, 2019, Gothenburg, Sweden)}, isbn = {978-1-950737-22-2}, doi = {10.25968/opus-1337}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:960-opus4-13375}, pages = {23 -- 33}, year = {2019}, abstract = {For the analysis of contract texts, validated model texts, such as model clauses, can be used to identify used contract clauses. This paper investigates how the similarity between titles of model clauses and headings extracted from contracts can be computed, and which similarity measure is most suitable for this. For the calculation of the similarities between title pairs we tested various variants of string similarity and token based similarity. We also compare two additional semantic similarity measures based on word embeddings using pre-trained embeddings and word embeddings trained on contract texts. The identification of the model clause title can be used as a starting point for the mapping of clauses found in contracts to verified clauses.}, subject = {Paraphrase}, language = {en} }