Institute for Parallel and Distributed Systems (IPVS)

Publications

An overview of publications of the Institute for Parallel and Distributed Systems.

Publications AS: Bibliography 2019 BibTeX

 
@inproceedings {INPROC-2019-32,
   author = {Vitali Hirsch and Peter Reimann and Bernhard Mitschang},
   title = {{Data-Driven Fault Diagnosis in End-of-Line Testing of Complex Products}},
   booktitle = {Proceedings of the 6th IEEE International Conference on Data Science and Advanced Analytics (DSAA 2019), Washington, D.C., USA},
   publisher = {IEEE Xplore},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   type = {Conference Paper},
   month = {October},
   year = {2019},
   keywords = {decision support; classification; ensembles; automotive; fault diagnosis; quality management; sampling},
   language = {English},
   cr-category = {H.2.8 Database Applications},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Machine learning approaches may support various use cases in the manufacturing industry. However, these approaches often do not address the inherent characteristics of the real manufacturing data at hand. In fact, real data impose analytical challenges that have a strong influence on the performance and suitability of machine learning methods. This paper considers such a challenging use case in the area of End-of-Line testing, i.e., the final functional check of complex products after the whole assembly line. Here, classification approaches may be used to support quality engineers in identifying faulty components of defective products. For this, we discuss relevant data sources and their characteristics, and we derive the resulting analytical challenges. We have identified a set of sophisticated data-driven methods that may be suitable to our use case at first glance, e.g., methods based on ensemble learning or sampling. The major contribution of this paper is a thorough comparative study of these methods to identify whether they are able to cope with the analytical challenges. This comprises the discussion of both fundamental theoretical aspects and major results of detailed experiments we have performed on the real data of our use case.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2019-32&engl=1}
}
@inproceedings {INPROC-2019-20,
   author = {Manuel Fritz and Holger Schwarz},
   title = {{Initializing k-Means Efficiently: Benefits for Exploratory Cluster Analysis}},
   booktitle = {On the Move to Meaningful Internet Systems: OTM 2019 Conferences},
   editor = {Herv{\'e} Panetto and Christophe Debruyne and Martin Hepp and Dave Lewis and Claudio Agostino Ardagna and Robert Meersman},
   publisher = {Springer Nature Switzerland AG},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   series = {Lecture Notes in Computer Science (LNCS)},
   volume = {11877},
   pages = {146--163},
   type = {Conference Paper},
   month = {January},
   year = {2019},
   issn = {978-3-030-33245-7},
   doi = {10.1007/978-3-030-33246-4},
   keywords = {Exploratory cluster analysis; k-Means; Initialization},
   language = {English},
   cr-category = {E.0 Data General,     H.2.8 Database Applications,     H.3.3 Information Search and Retrieval},
   ee = {https://link.springer.com/chapter/10.1007/978-3-030-33246-4_9},
   contact = {manuel.fritz@ipvs.uni-stuttgart.de},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Data analysis is a highly exploratory task, where various algorithms with different parameters are executed until a solid result is achieved. This is especially evident for cluster analyses, where the number of clusters must be provided prior to the execution of the clustering algorithm. Since this number is rarely known in advance, the algorithm is typically executed several times with varying parameters. Hence, the duration of the exploratory analysis heavily dependends on the runtime of each execution of the clustering algorithm. While previous work shows that the initialization of clustering algorithms is crucial for fast and solid results, it solely focuses on a single execution of the clustering algorithm and thereby neglects previous executions. We propose Delta Initialization as an initialization strategy for k-Means in such an exploratory setting. The core idea of this new algorithm is to exploit the clustering results of previous executions in order to enhance the initialization of subsequent executions. We show that this algorithm is well suited for exploratory cluster analysis as considerable speedups can be achieved while additionally achieving superior clustering results compared to state-of-the-art initialization strategies.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2019-20&engl=1}
}
@inproceedings {INPROC-2019-19,
   author = {Christoph Stach},
   title = {{VAULT: A Privacy Approach towards High-Utility Time Series Data}},
   booktitle = {Proceedings of the Thirteenth International Conference on Emerging Security Information, Systems and Technologies: SECURWARE 2019},
   publisher = {IARIA},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   pages = {1--6},
   type = {Conference Paper},
   month = {October},
   year = {2019},
   keywords = {Privacy; Time Series; Projection; Selection; Aggregation; Interpolation; Smoothing; Information Emphasization; Noise},
   language = {English},
   cr-category = {K.4.1 Computers and Society Public Policy Issues,     D.4.6 Operating Systems Security and Protection},
   contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {While the Internet of Things (IoT) is a key driver for Smart Services that greatly facilitate our everyday life, it also poses a serious threat to privacy. Smart Services collect and analyze a vast amount of (partly private) data and thus gain valuable insights concerning their users. To prevent this, users have to balance service quality (i.e., reveal a lot of private data) and privacy (i.e., waive many features). Current IoT privacy approaches do not reflect this discrepancy properly and are often too restrictive as a consequence. For this reason, we introduce VAULT, a new approach for the protection of private data. VAULT is tailored to time series data as used by the IoT. It achieves a good tradeoff between service quality and privacy. For this purpose, VAULT applies five different privacy techniques. Our implementation of VAULT adopts a Privacy by Design approach.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2019-19&engl=1}
}
@inproceedings {INPROC-2019-18,
   author = {Dominik Lucke and Frank Steimle and Emir Cuk and Michael Luckert and Matthias Schneider and Daniel Schel},
   title = {{Implementation of the MIALinx User Interface for Future Manufacturing Environments}},
   booktitle = {Proceedings of the 52nd CIRP Conference on Manufacturing Systems (CMS), Ljubljana, Slovenia, June 12-14, 2019},
   publisher = {Elsevier},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   series = {Procedia CIRP},
   volume = {81},
   pages = {606--611},
   type = {Conference Paper},
   month = {June},
   year = {2019},
   doi = {10.1016/j.procir.2019.03.163},
   keywords = {Manufacturing; Smart Factory; Industrie 4.0; Manufacturing Service Bus; Rules; Integration; User Interface},
   language = {English},
   cr-category = {H.4.0 Information Systems Applications General,     I.2.1 Applications and Expert Systems},
   ee = {http://www.sciencedirect.com/science/article/pii/S2212827119304688},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {The flexible and easy-to-use integration of production equipment and IT systems on the shop floor becomes more and more a success factor for manufacturing to adapt rapidly to changing situations. The approach of the Manufacturing Integration Assistant (MIALinx) is to simplify this challenge. The integration steps range from integrating sensors over collecting and rule-based processing of sensor information to the execution of required actions. This paper presents the implementation of MIALinx to retrofit legacy machines for Industry 4.0 in a manufacturing environment and focus on the concept and implementation of the easy-to-use user interface as a key element.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2019-18&engl=1}
}
@inproceedings {INPROC-2019-16,
   author = {Marco Spie{\ss} and Peter Reimann},
   title = {{Angepasstes Item Set Mining zur gezielten Steuerung von Bauteilen in der Serienfertigung von Fahrzeugen}},
   booktitle = {Tagungsband der 18. Konferenz Datenbanksysteme f{\"u}r Business, Technologie und Web (BTW 2019)},
   publisher = {Gesellschaft f{\"u}r Informatik (GI)},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   series = {Lecture Notes in Informatics (LNI)},
   pages = {119--128},
   type = {Conference Paper},
   month = {March},
   year = {2019},
   language = {German},
   cr-category = {H.2.8 Database Applications},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Qualit{\"a}tsprobleme im Bereich Fahrzeugbau k{\"o}nnen nicht nur zum Imageverlust des Unternehmens f{\"u}hren, sondern auch mit entsprechend hohen Kosten einhergehen. Wird ein Bauteil als Verursacher eines Qualit{\"a}tsproblems identifiziert, muss dessen Verbau gestoppt werden. Mit einer Datenanalyse kann herausgefunden werden, welche Fahrzeugkonfigurationen Probleme mit diesem fehlerverursachenden Bauteil haben. Im Rahmen der dom{\"a}nenspezifischen Problemstellung wird in diesem Beitrag die Anwendbarkeit von Standardalgorithmen aus dem Bereich Data-Mining untersucht. Da die Analyseergebnisse auf Standardausstattungen hinweisen, sind diese nicht zielf{\"u}hrend. F{\"u}r dieses Businessproblem von Fahrzeugherstellern haben wir einen Data-Mining Algorithmus entwickelt, der das Vorgehen des Item Set Mining der Assoziationsanalyse an das dom{\"a}nenspezifische Problem anpasst. Er unterscheidet sich zum klassischen Apriori-Algorithmus in der Beschneidung des Ergebnisraumes sowie in der nachfolgenden Aufbereitung und Verwendungsweise der Item Sets. Der Algorithmus ist allgemeing{\"u}ltig f{\"u}r alle Fahrzeughersteller anwendbar. Die Ergebnisse sind anhand eines realen Anwendungsfalls evaluiert worden, bei dem durch die Anwendung unseres Algorithmus 87\% der Feldausf{\"a}lle verhindert werden k{\"o}nnen.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2019-16&engl=1}
}
@inproceedings {INPROC-2019-15,
   author = {Corinna Giebler and Christoph Gr{\"o}ger and Eva Hoos and Holger Schwarz},
   title = {{Modeling Data Lakes with Data Vault: Practical Experiences, Assessment, and Lessons Learned}},
   booktitle = {Proceedings of the 38th Conference on Conceptual Modeling (ER 2019)},
   publisher = {Springer},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   pages = {1--14},
   type = {Conference Paper},
   month = {November},
   year = {2019},
   keywords = {Data Lakes; Data Vault; Data Modeling; Industry Experience; Assessment; Lessons Learned},
   language = {German},
   cr-category = {H.2.1 Database Management Logical Design},
   contact = {Senden Sie eine E-Mail an Corinna.Giebler@ipvs.uni-stuttgart.de},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Data lakes have become popular to enable organization-wide analytics on heterogeneous data from multiple sources. Data lakes store data in their raw format and are often characterized as schema-free. Nevertheless, it turned out that data still need to be modeled, as neglecting data modeling may lead to issues concerning e.g., quality and integration. In current research literature and industry practice, Data Vault is a popular modeling technique for structured data in data lakes. It promises a flexible, extensible data model that preserves data in their raw format. However, hardly any research or assessment exist on the practical usage of Data Vault for modeling data lakes. In this paper, we assess the Data Vault model{\^a}€™s suitability for the data lake context, present lessons learned, and investigate success factors for the use of Data Vault. Our discussion is based on the practical usage of Data Vault in a large, global manufacturer{\^a}€™s data lake and the insights gained in real-world analytics projects.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2019-15&engl=1}
}
@inproceedings {INPROC-2019-14,
   author = {Corinna Giebler and Christoph Gr{\"o}ger and Eva Hoos and Holger Schwarz},
   title = {{Leveraging the Data Lake - Current State and Challenges}},
   booktitle = {Proceedings of the 21st International Conference on Big Data Analytics and Knowledge Discovery (DaWaK'19)},
   publisher = {Springer Nature},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   pages = {1--10},
   type = {Conference Paper},
   month = {August},
   year = {2019},
   keywords = {Data Lakes, State of the Art, Challenges},
   language = {German},
   cr-category = {H.2.4 Database Management Systems,     H.2.8 Database Applications},
   contact = {Senden Sie eine E-Mail an Corinna.Giebler@ipvs.uni-stuttgart.de},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {The digital transformation leads to massive amounts of heterogeneous data challenging traditional data warehouse solutions in enterprises. In order to exploit these complex data for competitive advantages, the data lake recently emerged as a concept for more flexible and powerful data analytics. However, existing literature on data lakes is rather vague and incomplete, and the various realization approaches that have been proposed neither cover all aspects of data lakes nor do they provide a comprehensive design and realization strategy. Hence, enterprises face multiple challenges when building data lakes. To address these shortcomings, we investigate existing data lake literature and discuss various design and realization aspects for data lakes, such as governance or data models. Based on these insights, we identify challenges and research gaps concerning (1) data lake architecture, (2) data lake governance, and (3) a comprehensive strategy to realize data lakes. These challenges still need to be addressed to successfully leverage the data lake in practice.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2019-14&engl=1}
}
@inproceedings {INPROC-2019-12,
   author = {Christoph Stach},
   title = {{Konzepte zum Schutz privater Muster in Zeitreihendaten: IoT-Anwendungen im Spannungsfeld zwischen Servicequalit{\"a}t und Datenschutz}},
   booktitle = {Informatik 2019: 50 Jahre Gesellschaft f{\"u}r Informatik – Informatik f{\"u}r Gesellschaft, Tagungsband der 49. Jahrestagung der Gesellschaft f{\"u}r Informatik e.V. (GI), 23.09. - 26.09.2019, Kassel},
   publisher = {GI Gesellschaft f{\"u}r Informatik e.V. (GI)},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   series = {Lecture Notes in Informatics (LNI)},
   pages = {1--14},
   type = {Conference Paper},
   month = {September},
   year = {2019},
   keywords = {Datenschutz; Zeitreihendaten; IoT; DSGVO; ePrivacy-Verordnung; TICK-Stack},
   language = {German},
   cr-category = {K.4.1 Computers and Society Public Policy Issues,     G.1.10 Numerical Analysis Applications},
   contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Obwohl das Internet der Dinge (IoT) die Voraussetzung f{\"u}r smarte Anwendungen schafft, die signifikante Vorteile gegen{\"u}ber traditionellen Anwendungen bieten, stellt die zunehmende Verbreitung von IoT-f{\"a}higen Ger{\"a}ten auch eine immense Gef{\"a}hrdung der Privatheit dar. IoT-Anwendungen sammeln eine Vielzahl an Daten und senden diese zur Verarbeitung an ein leistungsstarkes Back-End. Hierbei werden umfangreiche Erkenntnisse {\"u}ber den Nutzer gewonnen. Erst dieses Wissen erm{\"o}glicht die Servicevielfalt die IoT-Anwendungen bieten. Der Nutzer muss daher einen Kompromiss aus Servicequalit{\"a}t und Datenschutz treffen. Heutige Datenschutzans{\"a}tze ber{\"u}cksichtigen dies unzureichend und sind dadurch h{\"a}ufig zu restriktiv. Aus diesem Grund stellen wir neue Konzepte zum Schutz privater Daten f{\"u}r das IoT vor. Diese ber{\"u}cksichtigen die speziellen Eigenschaften der im IoT zum Einsatz kommenden Zeitreihendaten. So kann die Privatheit des Nutzers gew{\"a}hrleistet werden, ohne die Servicequalit{\"a}t unn{\"o}tig einzuschr{\"a}nken. Basierend auf den TICK-Stack beschreiben wir Implementierungsans{\"a}tze f{\"u}r unsere Konzepte, die einem Privacy-by-Design-Ansatz folgen.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2019-12&engl=1}
}
@inproceedings {INPROC-2019-10,
   author = {Christian Weber and Pascal Hirmer and Peter Reimann and Holger Schwarz},
   title = {{A New Process Model for the Comprehensive Management of Machine Learning Models}},
   booktitle = {Proceedings of the 21st International Conference on Enterprise Information Systems (ICEIS); Heraklion, Crete, Greece, May 3-5, 2019},
   editor = {Joaquim Filipe and Michal Smialek and Alexander Brodsky and Slimane Hammoudi},
   publisher = {SciTePress},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   pages = {415--422},
   type = {Conference Paper},
   month = {May},
   year = {2019},
   isbn = {978-989-758-372-8},
   doi = {10.5220/0007725304150422},
   keywords = {Model Management; Machine Learning; Analytics Process},
   language = {English},
   cr-category = {I.2 Artificial Intelligence},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {The management of machine learning models is an extremely challenging task. Hundreds of prototypical models are being built and just a few are mature enough to be deployed into operational enterprise information systems. The lifecycle of a model includes an experimental phase in which a model is planned, built and tested. After that, the model enters the operational phase that includes deploying, using, and retiring it. The experimental phase is well known through established process models like CRISP-DM or KDD. However, these models do not detail on the interaction between the experimental and the operational phase of machine learning models. In this paper, we provide a new process model to show the interaction points of the experimental and operational phase of a machine learning model. For each step of our process, we discuss according functions which are relevant to managing machine learning models.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2019-10&engl=1}
}
@inproceedings {INPROC-2019-09,
   author = {Christoph Stach and Bernhard Mitschang},
   title = {{ECHOES: A Fail-safe, Conflict Handling, and Scalable Data Management Mechanism for the Internet of Things}},
   booktitle = {Proceedings of the 23rd European Conference on Advances in Databases and Information Systems: ADBIS '19; Bled, Slovenia, September 8-11, 2019},
   editor = {Tatjana Welzer and Johann Eder and Vili Podgorelec and Aida Kamisalic Latific},
   publisher = {Springer},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   series = {Lecture Notes in Computer Science (LNCS)},
   pages = {1--16},
   type = {Conference Paper},
   month = {September},
   year = {2019},
   keywords = {Internet of Things; Data Exchange; Synchronization Protocol},
   language = {English},
   cr-category = {H.2.7 Database Administration,     H.2.4 Database Management Systems},
   contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {The Internet of Things (IoT) and Smart Services are becoming increasingly popular. Such services adapt to a user's needs by using sensors to detect the current situation. Yet, an IoT service has to capture its required data by itself, even if another service has already captured it before. There is no data exchange mechanism adapted to the IoT which enables sharing of sensor data among services and across devices. Therefore, we introduce a data management mechanism for the IoT. Due to its applied state-based synchronization protocol called ECHOES. It is fail-safe in case of connection failures, it detects and handles data conflicts, it is geared towards devices with limited resources, and it is highly scalable. We embed ECHOES into a data provisioning infrastructure, namely the Privacy Management Platform and the Secure Data Container. Evaluation results verify the practicability of our approach.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2019-09&engl=1}
}
@inproceedings {INPROC-2019-08,
   author = {Cornelia Kiefer and Peter Reimann and Bernhard Mitschang},
   title = {{A Hybrid Information Extraction Approach Exploiting Structured Data Within a Text Mining Process}},
   booktitle = {18. Fachtagung des GI-Fachbereichs ,,Datenbanken und Informationssysteme (DBIS), 4.-8. M{\"a}rz 2019, Rostock, Germany, Proceedings.},
   editor = {Torsten Grust and Felix Naumann and Alexander B{\"o}hm and Wolfgang Lehner and Theo H{\"a}rder and Erhard et al. Rahm},
   address = {Bonn},
   publisher = {Gesellschaft f$\backslash$``{u}r Informatik e.V. (GI)},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   pages = {149--168},
   type = {Conference Paper},
   month = {March},
   year = {2019},
   keywords = {information extraction; clustering; text mining; free text fields},
   language = {English},
   cr-category = {I.2.7 Natural Language Processing},
   ee = {https://doi.org/10.18420/btw2019-10},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Many data sets encompass structured data fields with embedded free text fields. The text fields allow customers and workers to input information which cannot be encoded in structured fields. Several approaches use structured and unstructured data in isolated analyses. The result of isolated mining of structured data fields misses crucial information encoded in free text. The result of isolated text mining often mainly repeats information already available from structured data. The actual information gain of isolated text mining is thus limited. The main drawback of both isolated approaches is that they may miss crucial information. The hybrid information extraction approach suggested in this paper adresses this issue. Instead of extracting information that in large parts was already available beforehand, it extracts new, valuable information from free texts. Our solution exploits results of analyzing structured data within the text mining process, i.e., structured information guides and improves the information extraction process on textual data. Our main contributions comprise the description of the concept of hybrid information extraction as well as a prototypical implementation and an evaluation with two real-world data sets from aftersales and production with English and German free text fields.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2019-08&engl=1}
}
@inproceedings {INPROC-2019-07,
   author = {Cornelia Kiefer},
   title = {{Quality Indicators for Text Data}},
   booktitle = {18. Fachtagung des GI-Fachbereichs ,,Datenbanken und Informationssysteme (DBIS), 4.-8. M{\"a}rz 2019, Rostock, Germany, Workshopband.},
   editor = {Holger Meyer and Norbert Ritter and Andreas Thor and Daniela Nicklas and Andreas Heuer and Meike Klettke},
   address = {Bonn},
   publisher = {Gesellschaft f$\backslash$``{u}r Informatik e.V. (GI)},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   series = {Dagstuhl Reports},
   pages = {145--154},
   type = {Conference Paper},
   month = {March},
   year = {2019},
   keywords = {data quality; text data quality; text mining; text analysis; quality indicators for text data},
   language = {English},
   cr-category = {I.2.7 Natural Language Processing},
   ee = {https://doi.org/10.18420/btw2019-ws-15},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Textual data sets vary in terms of quality. They have different characteristics such as the average sentence length or the amount of spelling mistakes and abbreviations. These text characteristics have influence on the quality of text mining results. They may be measured automatically by means of quality indicators. We present indicators, which we implemented based on natural language processing libraries such as Stanford CoreNLP and NLTK. We discuss design decisions in the implementation of exemplary indicators and provide all indicators on GitHub. In the evaluation, we investigate freetexts from production, news, prose, tweets and chat data and show that the suggested indicators predict the quality of two text mining modules.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2019-07&engl=1}
}
@inproceedings {INPROC-2019-06,
   author = {Christoph Stach and Frank Steimle and Clementine Gritti and Bernhard Mitschang},
   title = {{PSSST! The Privacy System for Smart Service Platforms: An Enabler for Confidable Smart Environments}},
   booktitle = {Proceedings of the 4th International Conference on Internet of Things, Big Data and Security (IoTBDS '19)},
   publisher = {SciTePress},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   pages = {1--12},
   type = {Conference Paper},
   month = {May},
   year = {2019},
   keywords = {Privacy; Access Control; Internet of Things; Smart Service Platform; Sensors; Actuators; Stream Processing},
   language = {English},
   cr-category = {K.4.1 Computers and Society Public Policy Issues,     D.4.6 Operating Systems Security and Protection},
   contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {The Internet of Things and its applications are becoming increasingly popular. Especially Smart Service Platforms like Alexa are in high demand. Such a platform retrieves data from sensors, processes them in a back-end, and controls actuators in accordance with the results. Thereby, all aspects of our everyday life can be managed. In this paper, we reveal the downsides of this technology by identifying its privacy threats based on a real-world application. Our studies show that current privacy systems do not tackle these issues adequately. Therefore, we introduce PSSST!, a user-friendly and comprehensive privacy system for Smart Service Platforms limiting the amount of disclosed private information while maximizing the quality of service at the same time.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2019-06&engl=1}
}
@inproceedings {INPROC-2019-03,
   author = {Christoph Stach and Corinna Giebler and Simone Schmidt},
   title = {{Zuverl{\"a}ssige Versp{\"a}tungsvorhersagen mithilfe von TAROT}},
   booktitle = {Tagungsband der 18. GI-Fachtagung Datenbanksysteme f{\"u}r Business, Technologie und Web},
   publisher = {GI Gesellschaft f{\"u}r Informatik e.V. (GI)},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   series = {Lecture Notes in Informatics (LNI)},
   pages = {1--4},
   type = {Demonstration},
   month = {March},
   year = {2019},
   keywords = {Versp{\"a}tungsvorhersage; {\"O}PNV; deskriptive Analyse; pr{\"a}diktive Analyse; Concept Drift},
   language = {German},
   cr-category = {H.2.8 Database Applications,     H.2.4 Database Management Systems},
   contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Bei der Einhaltung von Schadstoffwerten nehmen {\"o}ffentliche Verkehrsmittel eine immer entscheidendere Rolle ein. Daher wird vermehrt darauf geachtet, deren Attraktivit{\"a}t zu erh{\"o}hen. Ein wichtiger Punkt hierbei ist die Vorhersagegenauigkeit von Versp{\"a}tungen zu verbessern, damit Fahrg{\"a}ste entsprechend planen k{\"o}nnen. Die aktuell angewandten Ans{\"a}tze sind h{\"a}ufig ungenau, da sie die zur Verf{\"u}gung stehenden Daten nicht ausreichend nutzen. In diesem Beitrag stellen wir daher mit TAROT ein System vor, das mittels pr{\"a}diktiver Analysen die Vorhersagegenauigkeit von Versp{\"a}tungen verbessert, indem es in den Modellen Versp{\"a}tungsfortpflanzungen ber{\"u}cksichtigt. Dar{\"u}ber hinaus ist es in der Lage, im Fall einer St{\"o}rung augenblicklich auf ein besseres Vorhersagemodell umzusteigen und auf sowohl schleichende als auch abrupte Ver{\"a}nderungen automatisch zu reagieren. Die Vorteile dieser Eigenschaften lassen sich in unserem TAROT-Demonstrator anhand von vier repr{\"a}sentativen Anwendungsszenarien zeigen. Auch wenn sich die gezeigten Szenarien alle auf die Versp{\"a}tungsvorhersage von S-Bahnen beziehen, lassen sich die Konzepte von TAROT auch auf viele andere Anwendungsbereiche (z.B. zur Bestimmung von Produktionszeiten in der Industrie 4.0) anwenden.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2019-03&engl=1}
}
@inproceedings {INPROC-2019-02,
   author = {Christoph Stach and Frank Steimle},
   title = {{Recommender-based Privacy Requirements Elicitation - EPICUREAN: An Approach to Simplify Privacy Settings in IoT Applications with Respect to the GDPR}},
   booktitle = {Proceedings of the 34th ACM/SIGAPP Symposium On Applied Computing (PDP)},
   publisher = {ACM Press},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   pages = {1--8},
   type = {Conference Paper},
   month = {April},
   year = {2019},
   keywords = {privacy requirements elicitation; recommender system; knowledge modeling; clustering; association rules; privacy system; IoT; eHealth},
   language = {English},
   cr-category = {K.4.1 Computers and Society Public Policy Issues,     J.4 Social and Behavioral Sciences,     H.3.3 Information Search and Retrieval},
   contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Due to the Internet of Things (IoT), a giant leap towards a quantified self is made, i.e., more and more aspects of our lives are being captured, processed, and analyzed. This has many positive implications, e.g., Smart Health services help to relieve patients as well as physicians and reduce treatment costs. However, the price for such services is the disclosure of a lot of private data. For this reason, Smart Health services were particularly considered by the European General Data Protection Regulation (GDPR): a data subject's explicit consent is required when such a service processes his or her data. However, the elicitation of privacy requirements is a shortcoming in most IoT privacy systems. Either the user is overwhelmed by too many options or s/he is not sufficiently involved in the decision process. For this reason, we introduce EPICUREAN, a recommender-based privacy requirements elicitation approach. EPICUREAN uses modeling and data mining techniques to determine and recommend appropriate privacy settings to the user. The user is thus considerably supported but remains in full control over his or her private data.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2019-02&engl=1}
}
@article {ART-2019-24,
   author = {Daniel Del Gaudio and Pascal Hirmer},
   title = {{A lightweight messaging engine for decentralized data processing in the Internet of Things}},
   journal = {SICS Software-Intensive Cyber-Physical Systems},
   publisher = {Springer Berlin Heidelberg},
   pages = {39--48},
   type = {Article in Journal},
   month = {August},
   year = {2019},
   doi = {https://doi.org/10.1007/s00450-019-00410-z},
   language = {English},
   cr-category = {C.2.4 Distributed Systems},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Today, Internet of Things applications are available throughout many different domains (manufacturing, health, cities, homes), enabling a high degree of automation to ease people{\^a}€™s lives. For example, automated heating systems in a smart home can lead to reduced costs and an increased comfort for the residents. In the IoT, situations can be detected through interpretation of data produced by heterogeneous sensors, which typically lead to an invocation of actuators. In such applications, sensor data is usually streamed to a central instance for processing. However, especially in time-critical applications, this is not feasible, since high latency is an issue. To cope with this problem, in this paper, we introduce an approach for decentralized data processing in the IoT. This leads to decreased latency as well as a reduction of costs.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2019-24&engl=1}
}
@article {ART-2019-22,
   author = {Dimitri Petrik and Mathias Mormul and Peter Reimann},
   title = {{Anforderungen f{\"u}r Zeitreihendatenbanken in der industriellen Edge}},
   journal = {HMD Praxis der Wirtschaftsinformatik},
   publisher = {Springer-Verlag},
   volume = {56},
   pages = {1282--1308},
   type = {Article in Journal},
   month = {October},
   year = {2019},
   doi = {10.1365/s40702-019-00568-9},
   keywords = {Time Series Data; Time Series Database; Industrial IoT; Edge Computing; Defining Requirements; InfluxDB},
   language = {German},
   cr-category = {E.0 Data General},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Das industrielle Internet der Dinge (iIoT) integriert Informations- und Kommunikationstechnologien in die industriellen Prozesse und erweitert sie durch Echtzeit-Datenanalyse. Eine bedeutende Menge an Daten, die in der industriellen Fertigung generiert werden, sind sensorbasierte Zeitreihendaten, die in regelm{\"a}{\ss}igen Abst{\"a}nden generiert werden und zus{\"a}tzlich zum Sensorwert einen Zeitstempel enthalten. Spezielle Zeitreihen-Datenbanken (TSDB) sind daf{\"u}r ausgelegt, die Zeitreihendaten effizienter zu speichern. Wenn TSDBs in der N{\"a}he der Maschine (in der industriellen Edge) eingesetzt werden, sind Maschinendaten zur {\"U}berwachung zeitkritischer Prozesse aufgrund der niedrigen Latenz schnell verf{\"u}gbar, was die erforderliche Zeit f{\"u}r die Datenverarbeitung reduziert. Bisherige Untersuchungen zu TSDBs sind bei der Auswahl f{\"u}r den Einsatz in der industriellen Edge nur begrenzt hilfreich. Die meisten verf{\"u}gbaren Benchmarks von TSDBs sind performanceorientiert und ber{\"u}cksichtigen nicht die Einschr{\"a}nkungen der industriellen Edge. Wir adressieren diese L{\"u}cke und identifizieren die funktionalen Kriterien f{\"u}r den Einsatz von TSDBs im maschinennahen Umfeld und bilden somit einen qualitativen Anforderungskatalog. Des Weiteren zeigen wir am Beispiel von InfluxDB, wie dieser Katalog verwendet werden kann, mit dem Ziel die Auswahl einer geeigneten TSDB f{\"u}r Sensordaten in der Edge zu unterst{\"u}tzen.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2019-22&engl=1}
}
@article {ART-2019-21,
   author = {Christoph Stach},
   title = {{Datenschutzkonzepte f{\"u}r Zeitreihendaten: Bewertung von qualit{\"a}tsbewahrenden Transformationsoperatoren zum Schutz privater Datenmuster in IoT-Anwendungen}},
   journal = {Datenschutz und Datensicherheit - DuD},
   address = {Wiesbaden},
   publisher = {Springer Fachmedien},
   volume = {43},
   number = {12},
   pages = {753--759},
   type = {Article in Journal},
   month = {December},
   year = {2019},
   issn = {1614-0702},
   doi = {10.1007/s11623-019-1201-8},
   keywords = {Datenschutz; Zeitreihendaten; IoT; DSGVO},
   language = {German},
   cr-category = {K.4.1 Computers and Society Public Policy Issues,     G.1.10 Numerical Analysis Applications},
   contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Obwohl das Internet der Dinge (IoT) die Voraussetzung f{\"u}r smarte Anwendungen schaft, die signifkante Vorteile gegen{\"u}ber traditionellen Anwendungen bieten, stellt die zunehmende Verbreitung von IoT-f{\"a}higen Ger{\"a}ten auch eine immense Gef{\"a}hrdung der Privatheit dar. IoT-Anwendungen sammeln eine Vielzahl an Daten und senden diese zur Verarbeitung an ein Back-End. Hierbei werden umfangreiche Erkenntnisse {\"u}ber den Nutzer gewonnen. Erst dieses Wissen erm{\"o}glicht die Servicevielfalt, die IoT-Anwendungen bieten. Der Nutzer muss daher einen Kompromiss aus Servicequalit{\"a}t und Datenschutz trefen. Heutige Datenschutzans{\"a}tze ber{\"u}cksichtigen dies unzureichend und sind dadurch h{\"a}ufg zu restriktiv. Daher stellen wir neue Konzepte zum Schutz privater Daten f{\"u}r das IoT vor. Diese ber{\"u}cksichtigen die speziellen Eigenschaften von IoT-Zeitreihendaten. So kann die Privatheit des Nutzers gew{\"a}hrleistet werden, ohne die Servicequalit{\"a}t unn{\"o}tig einzuschr{\"a}nken.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2019-21&engl=1}
}
@article {ART-2019-12,
   author = {Mathias Mormul and Pascal Hirmer and Matthias Wieland and Bernhard Mitschang},
   title = {{Distributed Situation Recognition in Industry 4.0}},
   journal = {International Journal On Advances in Intelligent Systems},
   publisher = {IARIA},
   volume = {12},
   number = {1},
   pages = {39--49},
   type = {Article in Journal},
   month = {August},
   year = {2019},
   issn = {1942-2679},
   keywords = {Industry 4.0; Edge Computing; Situation Recognition; Distribution Pattern},
   language = {English},
   cr-category = {E.0 Data General},
   ee = {https://www.iariajournals.org/intelligent_systems/intsys_v12_n12_2019_paged.pdf},
   contact = {mathias.mormul@ipvs.uni-stuttgart.de},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {In recent years, advances in the Internet of Things led to new approaches and applications, for example, in the domains Smart Factories or Smart Cities. However, with the advantages such applications bring, also new challenges arise. One of these challenges is the recognition of situations, e.g., machine failures in Smart Factories. Especially in the domain of industrial manufacturing, several requirements have to be met in order to deliver a reliable and efficient situation recognition. One of these requirements is distribution in order to achieve high efficiency. In this article, we present a layered modeling approach to enable distributed situation recognition. These layers include the modeling, the deployment, and the execution of the situation recognition. Furthermore, we enable tool support to decrease the complexity for domain users.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2019-12&engl=1}
}
@article {ART-2019-11,
   author = {Manuel Fritz and Osama Muazzen and Michael Behringer and Holger Schwarz},
   title = {{ASAP-DM: a framework for automatic selection of analytic platforms for data mining}},
   journal = {Software-Intensive Cyber-Physical Systems},
   publisher = {Springer Berlin Heidelberg},
   pages = {1--13},
   type = {Article in Journal},
   month = {August},
   year = {2019},
   issn = {2524-8510},
   isbn = {2524-8529},
   doi = {10.1007/s00450-019-00408-7},
   keywords = {Data mining; Analytic platform; Platform selection},
   language = {English},
   cr-category = {E.0 Data General,     H.2.8 Database Applications,     H.3.3 Information Search and Retrieval},
   contact = {manuel.fritz@ipvs.uni-stuttgart.de},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {The plethora of analytic platforms escalates the difficulty of selecting the most appropriate analytic platform that fits the needed data mining task, the dataset as well as additional user-defined criteria. Especially analysts, who are rather focused on the analytics domain, experience difficulties to keep up with the latest developments. In this work, we introduce the ASAP-DM framework, which enables analysts to seamlessly use several platforms, whereas programmers can easily add several platforms to the framework. Furthermore, we investigate how to predict a platform based on specific criteria, such as lowest runtime or resource consumption during the execution of a data mining task. We formulate this task as an optimization problem, which can be solved by today{\^a}€™s classification algorithms. We evaluate the proposed framework on several analytic platforms such as Spark, Mahout, and WEKA along with several data mining algorithms for classification, clustering, and association rule discovery. Our experiments unveil that the automatic selection process can save up to 99.71\% of the execution time due to automatically choosing a faster platform.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2019-11&engl=1}
}
@article {ART-2019-10,
   author = {Cornelia Kiefer and Peter Reimann and Bernhard Mitschang},
   title = {{QUALM: Ganzheitliche Messung und Verbesserung der Datenqualit{\"a}t in der Textanalyse}},
   journal = {Datenbank-Spektrum},
   publisher = {Springer Verlag},
   pages = {1--12},
   type = {Article in Journal},
   month = {June},
   year = {2019},
   doi = {https://doi.org/10.1007/s13222-019-00318-7},
   keywords = {Datenqualit{\"a}t; Textanalyse; Text Mining; Trainingsdaten; Semantische Ressourcen},
   language = {German},
   cr-category = {H.3 Information Storage and Retrieval},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Bestehende Ans{\"a}tze zur Messung und Verbesserung der Qualit{\"a}t von Textdaten in der Textanalyse bringen drei gro{\ss}e Nachteile mit sich. Evaluationsmetriken wie zum Beispiel Accuracy messen die Qualit{\"a}t zwar verl{\"a}sslich, sie (1) sind jedoch auf aufw{\"a}ndig h{\"a}ndisch zu erstellende Goldannotationen angewiesen und (2) geben keine Ansatzpunkte f{\"u}r die Verbesserung der Qualit{\"a}t. Erste dom{\"a}nenspezifische Datenqualit{\"a}tsmethoden f{\"u}r unstrukturierte Textdaten kommen zwar ohne Goldannotationen aus und geben Ansatzpunkte zur Verbesserung der Datenqualit{\"a}t. Diese Methoden wurden jedoch nur f{\"u}r begrenzte Anwendungsgebiete entwickelt und (3) ber{\"u}cksichtigen deshalb nicht die Spezifika vieler Analysetools in Textanalyseprozessen. In dieser Arbeit pr{\"a}sentieren wir hierzu das QUALM-Konzept zum qualitativ hochwertigen Mining von Textdaten (QUALity Mining), das die drei o.g. Nachteile adressiert. Das Ziel von QUALM ist es, die Qualit{\"a}t der Analyseergebnisse, z. B. bzgl. der Accuracy einer Textklassifikation, auf Basis einer Messung und Verbesserung der Datenqualit{\"a}t zu erh{\"o}hen. QUALM bietet hierzu eine Menge an QUALM-Datenqualit{\"a}tsmethoden. QUALM-Indikatoren erfassen die Datenqualit{\"a}t ganzheitlich auf Basis der Passung zwischen den Eingabedaten und den Spezifika der Analysetools, wie den verwendeten Features, Trainingsdaten und semantischen Ressourcen (wie zum Beispiel W{\"o}rterb{\"u}chern oder Taxonomien). Zu jedem Indikator geh{\"o}rt ein passender Modifikator, mit dem sowohl die Daten als auch die Spezifika der Analysetools ver{\"a}ndert werden k{\"o}nnen, um die Datenqualit{\"a}t zu erh{\"o}hen. In einer ersten Evaluation von QUALM zeigen wir f{\"u}r konkrete Analysetools und Datens{\"a}tze, dass die Anwendung der QUALM-Datenqualit{\"a}tsmethoden auch mit einer Erh{\"o}hung der Qualit{\"a}t der Analyseergebnisse im Sinne der Evaluationsmetrik Accuracy einhergeht. Die Passung zwischen Eingabedaten und Spezifika der Analysetools wird hierzu mit konkreten QUALM-Modifikatoren erh{\"o}ht, die zum Beispiel Abk{\"u}rzungen aufl{\"o}sen oder automatisch auf Basis von Text{\"a}hnlichkeitsmetriken passende Trainingsdaten vorschlagen.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2019-10&engl=1}
}
@article {ART-2019-09,
   author = {Karoline Saatkamp and Uwe Breitenb{\"u}cher and Oliver Kopp and Frank Leymann},
   title = {{Method, formalization, and algorithms to split topology models for distributed cloud application deployments}},
   journal = {Computing},
   publisher = {Springer Wien},
   pages = {1--21},
   type = {Article in Journal},
   month = {April},
   year = {2019},
   isbn = {10.1007/s00607-019-00721-8},
   keywords = {Application deployment; Distribution; Splitting; Multi-cloud; TOSCA},
   language = {English},
   cr-category = {D.2.2 Software Engineering Design Tools and Techniques},
   ee = {https://link.springer.com/article/10.1007/s00607-019-00721-8},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems;     University of Stuttgart, Institute of Architecture of Application Systems},
   abstract = {For automating the deployment of applications in cloud environments, a variety of technologies have been developed in recent years. These technologies enable to specify the desired deployment in the form of deployment models that can be automatically processed by a provisioning engine. However, the deployment across several clouds increases the complexity of the provisioning. Using one deployment model with a single provisioning engine, which orchestrates the deployment across the clouds, forces the providers to expose low-level APIs to ensure the accessibility from outside. In this paper, we present an extended version of the split and match method to facilitate the division of deployment models to multiple models which can be deployed by each provider separately. The goal of this approach is to reduce the information and APIs which have to be exposed to the outside. We present a formalization and algorithms to automate the method. Moreover, we validate the practical feasibility by a prototype based on the TOSCA standard and the OpenTOSCA ecosystem.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2019-09&engl=1}
}
@article {ART-2019-07,
   author = {Manuel Fritz and Michael Behringer and Holger Schwarz},
   title = {{Quality-driven early stopping for explorative cluster analysis for big data}},
   journal = {Software-Intensive Cyber-Physical Systems},
   publisher = {Springer Berlin Heidelberg},
   pages = {1--12},
   type = {Article in Journal},
   month = {February},
   year = {2019},
   issn = {2524-8510},
   isbn = {2524-8529},
   doi = {10.1007/s00450-019-00401-0},
   keywords = {Clustering; Big Data; Early Stop; Convergence; Regression},
   language = {English},
   cr-category = {E.0 Data General,     H.2.8 Database Applications,     H.3.3 Information Search and Retrieval},
   ee = {https://link.springer.com/article/10.1007/s00450-019-00401-0},
   contact = {manuel.fritz@ipvs.uni-stuttgart.de},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Data analysis has become a critical success factor for companies in all areas. Hence, it is necessary to quickly gain knowledge from available datasets, which is becoming especially challenging in times of big data. Typical data mining tasks like cluster analysis are very time consuming even if they run in highly parallel environments like Spark clusters. To support data scientists in explorative data analysis processes, we need techniques to make data mining tasks even more efficient. To this end, we introduce a novel approach to stop clustering algorithms as early as possible while still achieving an adequate quality of the detected clusters. Our approach exploits the iterative nature of many cluster algorithms and uses a metric to decide after which iteration the mining task should stop. We present experimental results based on a Spark cluster using multiple huge datasets. The experiments unveil that our approach is able to accelerate the clustering up to a factor of more than 800 by obliterating many iterations which provide only little gain in quality. This way, we are able to find a good balance between the time required for data analysis and quality of the analysis results.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2019-07&engl=1}
}
@article {ART-2019-03,
   author = {Karoline Saatkamp and Uwe Breitenb{\"u}cher and Oliver Kopp and Frank Leymann},
   title = {{An approach to automatically detect problems in restructured deployment models based on formalizing architecture and design patterns}},
   journal = {SICS Software-Intensive Cyber-Physical Systems},
   publisher = {Springer Berlin Heidelberg},
   pages = {1--13},
   type = {Article in Journal},
   month = {February},
   year = {2019},
   doi = {10.1007/s00450-019-00397-7},
   keywords = {Topology-based deployment model; Patterns; Problem detection; TOSCA; Logic programming, Prolog},
   language = {English},
   cr-category = {C.2.4 Distributed Systems,     D.2.2 Software Engineering Design Tools and Techniques,     D.2.12 Software Engineering Interoperability,     K.6 Management of Computing and Information Systems},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems;     University of Stuttgart, Institute of Architecture of Application Systems},
   abstract = {For the automated deployment of applications, technologies exist which can process topology-based deployment models that describes the application's structure with its components and their relations. The topology-based deployment model of an application can be adapted for the deployment in different environments. However, the structural changes can lead to problems, which had not existed before and prevent a functional deployment. This includes security issues, communication restrictions, or incompatibilities. For example, a formerly over the internal network established insecure connection leads to security problems when using the public network after the adaptation. In order to solve problems in adapted deployment models, first the problems have to be detected. Unfortunately, detecting such problems is a highly non-trivial challenge that requires deep expertise about the involved technologies and the environment. In this paper, we present (i) an approach for detecting problems in deployment models using architecture and design patterns and (ii) the automation of the detection process by formalizing the problem a pattern solves in a certain context. We validate the practical feasibility of our approach by a prototypical implementation for the automated problem detection in TOSCA topologies.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2019-03&engl=1}
}
@inbook {INBOOK-2019-03,
   author = {Christoph Stach and Frank Steimle and Bernhard Mitschang},
   title = {{How to Realize Device Interoperability and Information Security in mHealth Applications}},
   series = {Biomedical Engineering Systems and Technologies},
   address = {Cham},
   publisher = {Springer Nature},
   series = {Communications in Computer and Information Science},
   volume = {1024},
   pages = {213--237},
   type = {Article in Book},
   month = {August},
   year = {2019},
   isbn = {978-3-030-29195-2},
   doi = {10.1007/978-3-030-29196-9_12},
   keywords = {mHealth; Device interoperability; Information security; COPD},
   language = {English},
   cr-category = {H.5.0 Information Interfaces and Presentation General,     K.6.5 Security and Protection,     K.8 Personal Computing},
   contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {More and more people suffer from chronic diseases such as the chronic obstructive pulmonary disease (COPD). This leads to very high treatment costs every year, as such patients require a periodic screening of their condition. However, many of these checks can be performed at home by the patients themselves. This enables physicians to focus on actual emergencies. Modern smart devices such as Smartphones contribute to the success of these telemedical approaches. So-called mHealth apps combine the usability and versatility of Smartphones with the high accuracy and reliability of medical devices for home use. However, patients often face the problem of how to connect medical devices to their Smartphones (the device interoperability problem). Moreover, many patients reject mHealth apps due to the lack of control over their sensitive health data (the information security problem). In our work, we discuss the usage of the Privacy Management Platform (PMP) to solve these problems. So, we describe the structure of mHealth apps and present a real-world COPD application. From this application we derive relevant functions of an mHealth app, in which device interoperability or information security is an issue. We extend the PMP in order to provide support for these recurring functions. Finally, we evaluate the utility of these PMP extensions based on the real-world mHealth app.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INBOOK-2019-03&engl=1}
}
@inbook {INBOOK-2019-02,
   author = {Christoph Stach and Bernhard Mitschang},
   title = {{Elicitation of Privacy Requirements for the Internet of Things Using ACCESSORS}},
   series = {Information Systems Security and Privacy},
   address = {Cham},
   publisher = {Springer Nature},
   series = {Communications in Computer and Information Science},
   volume = {977},
   pages = {40--65},
   type = {Article in Book},
   month = {July},
   year = {2019},
   isbn = {978-3-030-25108-6},
   doi = {10.1007/978-3-030-25109-3_3},
   keywords = {Permission model; Data-centric; Derivation transparent; Fine-grained; Context-sensitive; Internet of Things; PMP; PATRON},
   language = {English},
   cr-category = {K.4.1 Computers and Society Public Policy Issues,     D.4.6 Operating Systems Security and Protection},
   contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Novel smart devices are equipped with various sensors to capture context data. The Internet of Things (IoT) connects these devices with each other in order to bring together data from various domains. Due to the IoT, new application areas come up continuously. For instance, the quality of life and living can be significantly improved by installing connected and remote-controlled devices in Smart Homes. Or the treatment of chronic diseases can be made more convenient for both, patients and physicians, by using Smart Health technologies. For this, however, a large amount of data has to be collected, shared, and combined. This gathered data provides detailed insights into the user of the devices. Therefore, privacy is a key issue for such IoT applications. As current privacy systems for mobile devices focus on a single device only, they cannot be applied to a distributed and highly interconnected environment as the IoT. Therefore, we determine the special requirements towards a permission models for the IoT. Based on this requirements specification, we introduce ACCESSORS, a data-centric permission model for the IoT and describe how to apply such a model to two promising privacy systems for the IoT, namely the Privacy Management Platform (PMP) and PATRON.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INBOOK-2019-02&engl=1}
}
@inbook {INBOOK-2019-01,
   author = {Christoph Stach},
   title = {{Fine-Grained Privacy Control for Fitness and Health Applications Using the Privacy Management Platform}},
   series = {Information Systems Security and Privacy},
   address = {Cham},
   publisher = {Springer Nature},
   series = {Communications in Computer and Information Science},
   volume = {977},
   pages = {1--25},
   type = {Article in Book},
   month = {July},
   year = {2019},
   isbn = {978-3-030-25108-6},
   doi = {10.1007/978-3-030-25109-3_1},
   keywords = {Smartbands; Health and Fitness Applications; Privacy Concerns; Bluetooth; Internet; Privacy Policy Model; Privacy Management Platform},
   language = {English},
   cr-category = {K.4.1 Computers and Society Public Policy Issues,     D.4.6 Operating Systems Security and Protection,     K.8 Personal Computing},
   contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Due to the Internet of Things, novel types of sensors are integrated into everyday objects. A domain that benefits most is the fitness and health domain. With the advent of the so-called Smartbands - i.e., bracelets or watches with built-in sensors such as heart rate sensors, location sensors, or even glucose meters - novel fitness and health application are made possible. That way a quantified self can be created. Despite all the advantages that such applications entail, new privacy concerns arise. These applications collect and process sensitive health data. Users are concerned by reports about privacy violations. These violations are enabled by inherent security vulnerabilities and deficiencies in the privacy systems of mobile platforms. As none of the existing privacy approaches is designed for the novel challenges arising from Smartband applications, we discuss, how the Privacy Policy Model (PPM), a fine-grained and modular expandable permission model, can be applied to this application area. This model is implemented in the Privacy Management Platform (PMP). Thus the outcomes of this work can be leveraged directly. Evaluation results underline the benefits of our work for Smartband applications.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INBOOK-2019-01&engl=1}
}
 
To the top of the page