Institut fr Parallele und Verteilte Systeme (IPVS)

Publikationen

Eine bersicht der Publikationen des Instituts fr Parallele und Verteilte Systeme

Publikationen AS: Bibliographie 2020 BibTeX

 
@inproceedings {INPROC-2020-57,
   author = {Simon Dreher and Peter Reimann and Christoph Gr{\"o}ger},
   title = {{Application Fields and Research Gaps of Process Mining in Manufacturing Companies}},
   booktitle = {Proceedings of INFORMATIK 2020},
   editor = {R. H. Reussner and A Koziolek and R. Heinrich},
   publisher = {GI Gesellschaft f{\"u}r Informatik e.V. (GI)},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {621--634},
   type = {Konferenz-Beitrag},
   month = {Oktober},
   year = {2020},
   keywords = {Process Mining; Application; Production; Manufacturing; SCOR; Literature Review},
   language = {Englisch},
   cr-category = {H.2.8 Database Applications},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {To survive in global competition with increasing cost pressure, manufacturing companies must continuously optimize their manufacturing-related processes. Thereby, process mining constitutes an important data-driven approach to gain a profound understanding of the actual processes and to identify optimization potentials by applying data mining and machine learning techniques on event data. However, there is little knowledge about the feasibility and usefulness of process mining specifically in manufacturing companies. Hence, this paper provides an overview of potential applications of process mining for the analysis of manufacturing-related processes. We conduct a systematic literature review, classify relevant articles according to the Supply-Chain-Operations-Reference-Model (SCOR-model), identify research gaps, such as domain-specific challenges regarding unstructured, cascaded and non-linear processes or heterogeneous data sources, and give practitioners inspiration which manufacturing-related processes can be analyzed by process mining techniques.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-57&engl=0}
}
@inproceedings {INPROC-2020-56,
   author = {Christian Weber and Peter Reimann},
   title = {{MMP - A Platform to Manage Machine Learning Models in Industry 4.0 Environments}},
   booktitle = {Proceedings of the IEEE 24th International Enterprise Distributed Object Computing Workshop (EDOCW)},
   address = {Eindhoven, The Netherlands},
   publisher = {IEEE},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   type = {Demonstration},
   month = {Juli},
   year = {2020},
   keywords = {Model Management; Machine Learning; Collaborative Data Science},
   language = {Englisch},
   cr-category = {H.3.4 Information Storage and Retrieval Systems and Software},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {In manufacturing environments, machine learning models are being built for several use cases, such as predictive maintenance and product quality control. In this context, the various manufacturing processes, machines, and product variants make it necessary to create and use lots of different machine learning models. This calls for a software system that is able to manage all these diverse machine learning models and associated metadata. However, current model management systems do not associate models with business and domain context to provide non-expert users with tailored functions for model search and discovery. Moreover, none of the existing systems provides a comprehensive overview of all models within an organization. In our demonstration, we present the MMP, our model management platform that addresses these issues. The MMP provides a model metadata extractor, a model registry, and a context manager to store model metadata in a central metadata store. On top of this, the MMP provides frontend components that offer the above-mentioned functionalities. In our demonstration, we show two scenarios for model management in Industry 4.0 environments that illustrate the novel functionalities of the MMP. We demonstrate to the audience how the platform and its metadata, linking models to their business and domain context, help non-expert users to search and discover models. Furthermore, we show how to use MMP's powerful visualizations for model reporting, such as a dashboard and a model landscape view.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-56&engl=0}
}
@inproceedings {INPROC-2020-55,
   author = {Corinna Giebler and Christoph Gr{\"o}ger and Eva Hoos and Holger Schwarz and Bernhard Mitschang},
   title = {{A Zone Reference Model for Enterprise-Grade Data Lake Management}},
   booktitle = {Proceedings of the 24th IEEE Enterprise Computing Conference},
   publisher = {IEEE},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {57--66},
   type = {Konferenz-Beitrag},
   month = {Oktober},
   year = {2020},
   keywords = {Data Lake; Zones; Reference Model; Industry Case; Industry Experience},
   language = {Englisch},
   cr-category = {H.4 Information Systems Applications},
   contact = {Senden Sie eine E-Mail an corinna.giebler@ipvs.uni-stuttgart.de},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Data lakes are on the rise as data platforms for any kind of analytics, from data exploration to machine learning. They achieve the required flexibility by storing heterogeneous data in their raw format, and by avoiding the need for pre-defined use cases. However, storing only raw data is inefficient, as for many applications, the same data processing has to be applied repeatedly. To foster the reuse of processing steps, literature proposes to store data in different degrees of processing in addition to their raw format. To this end, data lakes are typically structured in zones. There exists various zone models, but they are varied, vague, and no assessments are given. It is unclear which of these zone models is applicable in a practical data lake implementation in enterprises. In this work, we assess existing zone models using requirements derived from multiple representative data analytics use cases of a real-world industry case. We identify the shortcomings of existing work and develop a zone reference model for enterprise-grade data lake management in a detailed manner. We assess the reference model's applicability through a prototypical implementation for a real-world enterprise data lake use case. This assessment shows that the zone reference model meets the requirements relevant in practice and is ready for industry use.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-55&engl=0}
}
@inproceedings {INPROC-2020-54,
   author = {Manuel Fritz and Dennis Tschechlov and Holger Schwarz},
   title = {{Learning from Past Observations: Meta-Learning for Efficient Clustering Analyses}},
   booktitle = {Proceedings of 22nd Big Data Analytics and Knowledge Discovery (DaWaK), 2020},
   editor = {Min Song and Il-Yeol Song and Gabriele Kotsis and A Min Tjoa and Ismail Khalil},
   publisher = {Springer, Cham},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   series = {Lecture Notes in Computer Science},
   volume = {12393},
   pages = {364--379},
   type = {Konferenz-Beitrag},
   month = {September},
   year = {2020},
   isbn = {978-3-030-59065-9},
   doi = {https://doi.org/10.1007/978-3-030-59065-9_28},
   language = {Englisch},
   cr-category = {H.3.3 Information Search and Retrieval},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Many clustering algorithms require the number of clusters as input parameter prior to execution. Since the {\^a}best{\^a} number of clusters is most often unknown in advance, analysts typically execute clustering algorithms multiple times with varying parameters and subsequently choose the most promising result. Several methods for an automated estimation of suitable parameters have been proposed. Similar to the procedure of an analyst, these estimation methods draw on repetitive executions of a clustering algorithm with varying parameters. However, when working with voluminous datasets, each single execution tends to be very time-consuming. Especially in today{\^a}s Big Data era, such a repetitive execution of a clustering algorithm is not feasible for an efficient exploration. We propose a novel and efficient approach to accelerate estimations for the number of clusters in datasets. Our approach relies on the idea of meta-learning and terminates each execution of the clustering algorithm as soon as an expected qualitative demand is met. We show that this new approach is generally applicable, i.e., it can be used with existing estimation methods. Our comprehensive evaluation reveals that our approach is able to speed up the estimation of the number of clusters by an order of magnitude, while still achieving accurate estimates.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-54&engl=0}
}
@inproceedings {INPROC-2020-53,
   author = {Manuel Fritz and Michael Behringer and Holger Schwarz},
   title = {{LOG-Means: Efficiently Estimating the Number of Clusters in Large Datasets}},
   booktitle = {Proceedings of the 46th International Conference on Very Large Databases (VLDB)},
   editor = {Magdalena Balazinska and Xiaofang Zhou},
   publisher = {ACM Digital Library},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   series = {Proceedings of the VLDB Endowment},
   volume = {13 (12)},
   pages = {2118--2131},
   type = {Konferenz-Beitrag},
   month = {August},
   year = {2020},
   isbn = {ISSN 2150-8097},
   doi = {https://doi.org/10.14778/3407790.3407813},
   language = {Englisch},
   cr-category = {H.2.8 Database Applications},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Clustering is a fundamental primitive in manifold applications. In order to achieve valuable results, parameters of the clustering algorithm, e.g., the number of clusters, have to be set appropriately, which is a tremendous pitfall. To this end, analysts rely on their domain knowledge in order to define parameter search spaces. While experienced analysts may be able to define a small search space, especially novice analysts often define rather large search spaces due to the lack of in-depth domain knowledge. These search spaces can be explored in different ways by estimation methods for the number of clusters. In the worst case, estimation methods perform an exhaustive search in the given search space, which leads to infeasible runtimes for large datasets and large search spaces. We propose LOG-Means, which is able to overcome these issues of existing methods. We show that LOG-Means provides estimates in sublinear time regarding the defined search space, thus being a strong fit for large datasets and large search spaces. In our comprehensive evaluation on an Apache Spark cluster, we compare LOG-Means to 13 existing estimation methods. The evaluation shows that LOG-Means significantly outperforms these methods in terms of runtime and accuracy. To the best of our knowledge, this is the most systematic comparison on large datasets and search spaces as of today.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-53&engl=0}
}
@inproceedings {INPROC-2020-52,
   author = {Daniel Del Gaudio and Maximilian Reichel and Pascal Hirmer},
   title = {{A Life Cycle Method for Device Management in Dynamic IoT Environments}},
   booktitle = {Proceedings of the 5th International Conference on Internet of Things, Big Data and Security - Volume 1: IoTBDS},
   publisher = {SciTePress Digital Library},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {46--56},
   type = {Konferenz-Beitrag},
   month = {Mai},
   year = {2020},
   keywords = {Internet of Things, Discovery, Device Integration, Decentralization},
   language = {Englisch},
   cr-category = {C.2.4 Distributed Systems},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {In the Internet of Things, interconnected devices communicate with each other through standardized internet protocols to reach common goals. By doing so, they enable building complex, self-organizing applications, such as Smart Cities, or Smart Factories. Especially in large IoT environments, newly appearing devices as well as leaving or failing IoT devices are a great challenge. New devices need to be integrated into the application whereas failing devices need to be dealt with. In a Smart City, newly appearing actors, for example, smart phones or connected cars, appear and disappear all the time. Dealing with this dynamic is a great issue, especially when done automatically. Consequently, in this paper, we introduce A Life Cycle Method for Device Management in Dynamic IoT Environments. This method enables integrating newly appearing IoT devices into IoT applications and, furthermore, offers means to cope with failing devices. Our approach is evaluated through a system architecture a nd a corresponding prototypical implementation.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-52&engl=0}
}
@inproceedings {INPROC-2020-51,
   author = {Daniel Del Gaudio and Pascal Hirmer},
   title = {{Fulfilling the IoT Vision: Are We There Yet?}},
   booktitle = {In Proceedings of the 5th International Conference on Internet of Things, Big Data and Security - Volume 1: IoTBDS},
   publisher = {SciTePress Digital Library},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {367--374},
   type = {Konferenz-Beitrag},
   month = {Mai},
   year = {2020},
   isbn = {978-989-758-426-8},
   keywords = {Internet of Things, Decentralized, Autonomous, Dynamic, Smart},
   language = {Deutsch},
   cr-category = {C.2 Computer-Communication Networks},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {The vision of the Internet of Things is enabling self-controlled and decentralized environments, in which hard- ware devices, equipped with sensors and actuators communicate with each other trough standardized internet protocols to reach common goals. The device-to-device communication should be decentralized and should not necessarily require human interaction. However, enabling such complex IoT applications, e.g., connected cars, is a big challenge, since many requirements need to be fulfilled. These requirements include, for exam- ple, security, privacy, timely data processing, uniform communication standards, or location-awareness. Based on an intensive literature review, in this overview paper, we define requirements for such environments and, in addition, we discuss whether they are fulfilled by state-of-the-art approaches or whether there still has to be work done in the future. We conclude this paper by illustrating research gaps that have to be filled in order to realize the IoT vision.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-51&engl=0}
}
@inproceedings {INPROC-2020-50,
   author = {Rebecca Eichler and Corinna Giebler and Christoph Gr{\"o}ger and Holger Schwarz and Bernhard Mitschang},
   title = {{HANDLE - A Generic Metadata Model for Data Lakes}},
   booktitle = {Big Data Analytics and Knowledge Discovery: 22nd International Conference, DaWaK 2020, Bratislava, Slovakia, September 1417, 2020, Proceedings},
   publisher = {Springer, Cham},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {73--88},
   type = {Konferenz-Beitrag},
   month = {September},
   year = {2020},
   language = {Englisch},
   cr-category = {H.2 Database Management},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {The substantial increase in generated data induced the development of new concepts such as the data lake. A data lake is a large storage repository designed to enable flexible extraction of the data{\^a}s value. A key aspect of exploiting data value in data lakes is the collection and management of metadata. To store and handle the metadata, a generic metadata model is required that can reflect metadata of any potential metadata management use case, e.g., data versioning or data lineage. However, an evaluation of existent metadata models yields that none so far are sufficiently generic. In this work, we present HANDLE, a generic metadata model for data lakes, which supports the flexible integration of metadata, data lake zones, metadata on various granular levels, and any metadata categorization. With these capabilities HANDLE enables comprehensive metadata management in data lakes. We show HANDLE{\^a}s feasibility through the application to an exemplary access-use-case and a prototypical implementation. A comparison with existent models yields that HANDLE can reflect the same information and provides additional capabilities needed for metadata management in data lakes.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-50&engl=0}
}
@inproceedings {INPROC-2020-48,
   author = {Dennis Przytarski and Christoph Stach and Cl{\'e}mentine Gritti and Bernhard Mitschang},
   title = {{A Blueprint for a Trustworthy Health Data Platform Encompassing IoT and Blockchain Technologies}},
   booktitle = {Proceedings of the ISCA 29th International Conference on Software Engineering and Data Engineering (Las Vegas, October 2020)},
   publisher = {ISCA in Cooperation with IEEE Computer Society},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Germany},
   pages = {1--10},
   type = {Konferenz-Beitrag},
   month = {Oktober},
   year = {2020},
   language = {Englisch},
   cr-category = {H.2.7 Database Administration,     K.6.5 Security and Protection},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte H{\"o}chstleistungsrechner, Anwendersoftware},
   abstract = {eHealth provides great relief for patients and physicians. This means, patients autonomously monitor their condition via IoT medical devices and make these data available to physicians for analyses. This requires a data platform that takes care of data acquisition, management, and provisioning. As health data are highly sensitive, there are major concerns regarding data security with respect to confidentiality, integrity, and authenticity. To this end, we present a blueprint for constructing a trustworthy health data platform called SEAL. It provides a lightweight attribute-based authentication mechanism for IoT devices to validate all involved data sources, there is a fine-grained data provisioning system to enable data provision according to actual requirements, and a verification procedure ensures that data cannot be manipulated.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-48&engl=0}
}
@inproceedings {INPROC-2020-45,
   author = {Rebecca Eichler and Corinna Giebler and Christoph Gr{\"o}ger and Holger Schwarz and Bernhard Mitschang},
   title = {{HANDLE - A Generic Metadata Model for Data Lakes}},
   booktitle = {Big Data Analytics and Knowledge Discovery},
   editor = {Min Song and Il-Yeol Song and Gabriele Kotsis and A Min Tjoa and Ismail Khalil},
   publisher = {Springer Nature Switzerland AG},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   series = {Lecture Notes in Computer Science},
   volume = {12393},
   pages = {73--88},
   type = {Konferenz-Beitrag},
   month = {September},
   year = {2020},
   doi = {https://doi.org/10.1007/978-3-030-59065-9_7},
   keywords = {Metadata management; Metadata model; Data lake},
   language = {Englisch},
   cr-category = {H.2 Database Management},
   ee = {ftp://ftp.informatik.uni-stuttgart.de/pub/library/ncstrl.ustuttgart_fi/INPROC-2020-45/INPROC-2020-45.pdf},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {The substantial increase in generated data induced the development of new concepts such as the data lake. A data lake is a large storage repository designed to enable flexible extraction of the data's value. A key aspect of exploiting data value in data lakes is the collection and management of metadata. To store and handle the metadata, a generic metadata model is required that can reflect metadata of any potential metadata management use case, e.g., data versioning or data lineage. However, an evaluation of existent metadata models yields that none so far are sufficiently generic. In this work, we present HANDLE, a generic metadata model for data lakes, which supports the flexible integration of metadata, data lake zones, metadata on various granular levels, and any metadata categorization. With these capabilities HANDLE enables comprehensive metadata management in data lakes. We show HANDLE's feasibility through the application to an exemplary access-use-case and a prototypical implementation. A comparison with existent models yields that HANDLE can reflect the same information and provides additional capabilities needed for metadata management in data lakes.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-45&engl=0}
}
@inproceedings {INPROC-2020-39,
   author = {Ana Cristina Franco da Silva and Pascal Hirmer and Jan Schneider and Seda Ulusal and Matheus Tavares Frigo},
   title = {{MBP: Not just an IoT Platform}},
   booktitle = {2020 IEEE International Conference on Pervasive Computing and Communications Workshops (PerCom Workshops)},
   publisher = {Institute of Electrical and Electronics Engineers, Inc. (IEEE)},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {1--3},
   type = {Demonstration},
   month = {August},
   year = {2020},
   isbn = {978-1-7281-4716-1},
   doi = {10.1109/PerComWorkshops48775.2020.9156156},
   keywords = {Internet of Things; Sensor Integration; IoT environments; Complex Event Processing},
   language = {Englisch},
   cr-category = {H.0 Information Systems General},
   ee = {https://ieeexplore.ieee.org/document/9156156},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {In this demonstration paper, we introduce the Multi-purpose Binding and Provisioning Platform (MBP), an open-source IoT platform developed for easy binding, provisioning, and management of IoT environments. Furthermore, the MBP enables the simple realization of IoT applications, such as heating, ventilation, air conditioning (HVAC) systems, by allowing users to create rules for the IoT environment, in a straightforward and event-condition-action fashion. The efficient and timely data processing of IoT environments are assured through underlying complex event processing technologies.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-39&engl=0}
}
@inproceedings {INPROC-2020-38,
   author = {Alejandro Villanueva Zacarias and Rachaa Ghabri and Peter Reimann},
   title = {{AD4ML: Axiomatic Design to Specify Machine Learning Solutions for Manufacturing}},
   booktitle = {Proceedings of the 21st International Conference on Information Reuse and Integration for Data Science},
   publisher = {IEEE},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   type = {Konferenz-Beitrag},
   month = {August},
   year = {2020},
   language = {Englisch},
   cr-category = {H.2.8 Database Applications},
   contact = {manufacturing; machine-learning; design},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Machine learning is increasingly adopted in manufacturing use cases, e.g., for fault detection in a production line. Each new use case requires developing its own machine learning (ML) solution. A ML solution integrates different software components to read, process, and analyze all use case data, as well as to finally generate the output that domain experts need for their decision-making. The process to design a system specification for a ML solution is not straight-forward. It entails two types of complexity: (1) The technical complexity of selecting combinations of ML algorithms and software components that suit a use case; (2) the organizational complexity of integrating different requirements from a multidisciplinary team of, e.g., domain experts, data scientists, and IT specialists. In this paper, we propose several adaptations to Axiomatic Design in order to design ML solution specifications that handle these complexities. We call this Axiomatic Design for Machine Learning (AD4ML). We apply AD4ML to specify a ML solution for a fault detection use case and discuss to what extent our approach conquers the above-mentioned complexities. We also discuss how AD4ML facilitates the agile design of ML solutions.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-38&engl=0}
}
@inproceedings {INPROC-2020-37,
   author = {Mathias Mormul and Pascal Hirmer and Christoph Stach and Bernhard Mitschang},
   title = {{DEAR: Distributed Evaluation of Alerting Rules}},
   booktitle = {IEEE 13th International Conference on Cloud Computing (CLOUD)},
   publisher = {IEEE},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {1--1},
   type = {Konferenz-Beitrag},
   month = {Dezember},
   year = {2020},
   keywords = {cloud monitoring; agent-based; alerting},
   language = {Englisch},
   cr-category = {H.0 Information Systems General},
   contact = {mathias.mormul@ipvs.uni-stuttgart.de},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Cloud computing passed the hype cycle long ago and firmly established itself as a future technology since then. However, to utilize the cloud as cost-efficiently as possible, a continuous monitoring is key to prevent an over- or undercommissioning of resources. In large-scaled scenarios, several challenges for cloud monitoring, such as high network traffic volume, low accuracy of monitoring data, and high time-toinsight, require new approaches in IT Operations while considering administrative complexity. To handle these challenges, we present DEAR, the Distributed Evaluation of Alerting Rules. DEAR is a plugin for monitoring systems which automatically distributes alerting rules to the monitored resources to solve the trade-off between high accuracy and low network traffic volume without administrative overhead. We evaluate our approach against requirements of today{\^a}s IT monitoring and compare it to conventional agent-based monitoring approaches.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-37&engl=0}
}
@inproceedings {INPROC-2020-32,
   author = {Vitali Hirsch and Peter Reimann and Bernhard Mitschang},
   title = {{Exploiting Domain Knowledge to Address Multi-Class Imbalance and a Heterogeneous Feature Space in Classification Tasks for Manufacturing Data}},
   booktitle = {Proceedings of the 46th International Conference on Very Large Databases (VLDB)},
   editor = {Magdalena Balazinska and Xiaofang Zhou},
   publisher = {ACM Digital Library},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   series = {Proceedings of the VLDB Endowment},
   volume = {13(12)},
   type = {Konferenz-Beitrag},
   month = {August},
   year = {2020},
   language = {Englisch},
   cr-category = {H.2.8 Database Applications},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Classification techniques are increasingly adopted for quality control in manufacturing, e. g., to help domain experts identify the cause of quality issues of defective products. However, real-world data often imply a set of analytical challenges, which lead to a reduced classification performance. Major challenges are a high degree of multi-class imbalance within data and a heterogeneous feature space that arises from the variety of underlying products. This paper considers such a challenging use case in the area of End-of-Line testing, i. e., the final functional test of complex products. Existing solutions to classification or data pre-processing only address individual analytical challenges in isolation. We propose a novel classification system that explicitly addresses both challenges of multi-class imbalance and a heterogeneous feature space together. As main contribution, this system exploits domain knowledge to systematically prepare the training data. Based on an experimental evaluation on real-world data, we show that our classification system outperforms any other classification technique in terms of accuracy. Furthermore, we can reduce the amount of rework required to solve a quality issue of a product.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-32&engl=0}
}
@inproceedings {INPROC-2020-31,
   author = {Yannick Wilhelm and Peter Reimann and Wolfgang Gauchel and Bernhard Mitschang},
   title = {{Overview on Hybrid Approaches to Fault Detection and Diagnosis: Combining Data-driven, Physics-based and Knowledge-based Models}},
   booktitle = {Procedia CIRP: Proceedings of the 14th CIRP Conference on Intelligent Computation in Manufacturing Engineering (CIRP ICME)},
   publisher = {Elsevier BV},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   type = {Konferenz-Beitrag},
   month = {Juli},
   year = {2020},
   keywords = {Fault Detection; Fault Diagnosis; Hybrid Methods; Diagnostics and Maintenance; Knowledge-driven Methods; Machine Learning},
   language = {Englisch},
   cr-category = {H.2.8 Database Applications,     I.2.1 Applications and Expert Systems},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {In this paper, we review hybrid approaches for fault detection and fault diagnosis (FDD) that combine data-driven analysis with physics-based and knowledge-based models to overcome a lack of data and to increase the FDD accuracy. We categorize these hybrid approaches according to the steps of an extended common workflow for FDD. This gives practitioners indications of which kind of hybrid FDD approach they can use in their application.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-31&engl=0}
}
@inproceedings {INPROC-2020-20,
   author = {Yannick Wilhelm and Ulf Schreier and Peter Reimann and Bernhard Mitschang and Holger Ziekow},
   title = {{Data Science Approaches to Quality Control in Manufacturing: A Review of Problems, Challenges and Architecture}},
   booktitle = {Springer Proceedings Series Communications in Computer and Information Science (CCIS)},
   publisher = {Springer},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   type = {Konferenz-Beitrag},
   month = {Oktober},
   year = {2020},
   keywords = {Data Science; Machine Learning; Quality Control; Challenges; Functional Architecture},
   language = {Englisch},
   cr-category = {H.2.8 Database Applications},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Manufacturing environments are characterized by non-stationary processes, constantly varying conditions, complex process interdependencies, and a high number of product variants. These and other aspects pose several challenges for common machine learning algorithms to achieve reliable and accurate predictions. This overview and vision paper provides a comprehensive list of common problems and challenges for data science approaches to quality control in manufacturing. We have derived these problems and challenges by inspecting three real-world use cases in the eld of product quality control and via a comprehensive literature study. We furthermore associate the identi ed problems and challenges to individual layers and components of a functional setup, as it can be found in manufacturing environments today. Additionally, we extend and revise this functional setup and this way propose our vision of a future functional software architecture. This functional architecture represents a visionary blueprint for solutions that are able to address all challenges for data science approaches in manufacturing quality control.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-20&engl=0}
}
@inproceedings {INPROC-2020-19,
   author = {Christian Weber and Pascal Hirmer and Peter Reimann},
   title = {{A Model Management Platform for Industry 4.0 - Enabling Management of Machine Learning Models in Manufacturing Environments}},
   booktitle = {Proceedings of the 23rd International Conference on Business Information Systems (BIS)},
   editor = {Witold Abramowicz and Rainer Alt and Gary Klein and Adrian Paschke and Kurt Sandkuhl},
   publisher = {Springer International Publishing},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   series = {Lecture Notes in Business Information Processing},
   type = {Konferenz-Beitrag},
   month = {November},
   year = {2020},
   issn = {1865-1348},
   keywords = {Model Management; Machine Learning; Metadata Tracking},
   language = {Englisch},
   cr-category = {H.3.4 Information Storage and Retrieval Systems and Software},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Industry 4.0 use cases such as predictive maintenance and product quality control make it necessary to create, use and maintain a multitude of di erent machine learning models. In this setting, model management systems help to organize models. However, concepts for model management systems currently focus on data scientists, but do not support non-expert users such as domain experts and business analysts. Thus, it is dicult for them to reuse existing models for their use cases. In this paper, we address these challenges and present an architecture, a metadata schema and a corresponding model management platform.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-19&engl=0}
}
@inproceedings {INPROC-2020-18,
   author = {Julian Ziegler and Peter Reimann and Florian Keller and Bernhard Mitschang},
   title = {{A Graph-based Approach to Manage CAE Data in a Data Lake}},
   booktitle = {Procedia CIRP: Proceedings of the 53rd CIRP Conference on Manufacturing Systems (CIRP CMS 2020)},
   publisher = {Elsevier},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   type = {Konferenz-Beitrag},
   month = {Juli},
   year = {2020},
   language = {Englisch},
   cr-category = {H.2.8 Database Applications},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Computer-aided engineering (CAE) applications generate vast quantities of heterogeneous data. Domain experts often fail to explore and analyze these data, because they are not integrated across di erent applications. Existing data management solutions are rather tailored to scientific applications. In our approach, we tackle this issue by combining a data lake solution with graph-based metadata management. This provides a holistic view of all CAE data and of the data-generating applications in one interconnected structure. Based on a prototypical implementation, we discuss how this eases the task of domain experts to explore and extract data for further analyses.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-18&engl=0}
}
@inproceedings {INPROC-2020-17,
   author = {Vitali Hirsch and Peter Reimann and Bernhard Mitschang},
   title = {{Incorporating Economic Aspects into Recommendation Ranking to Reduce Failure Costs}},
   booktitle = {Procedia CIRP: Proceedings of the 53rd CIRP Conference on Manufacturing Systems (CIRP CMS 2020)},
   publisher = {Elsevier},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   type = {Konferenz-Beitrag},
   month = {Juli},
   year = {2020},
   keywords = {decision support; predictive analytics; quality control; End-of-Line testing; classification; fault isolation; failure costs},
   language = {Englisch},
   cr-category = {H.2.8 Database Applications},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Machine learning approaches for manufacturing usually o er recommendation lists, e.g., to support humans in fault diagnosis. For instance, if a product does not pass the final check after the assembly, a recommendation list may contain likely faulty product components to be replaced. Thereby, the list ranks these components using their probabilities. However, these probabilities often di er marginally, while economic impacts, e.g., the costs for replacing components, di er significantly. We address this issue by proposing an approach that incorporates costs to re-rank a list. Our evaluation shows that this approach reduces fault-related costs when using recommendation lists to support human labor.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-17&engl=0}
}
@inproceedings {INPROC-2020-11,
   author = {Mathias Mormul and Pascal Hirmer and Christoph Stach and Bernhard Mitschang},
   title = {{Avoiding Vendor-Lockin in Cloud Monitoring using Generic Agent Templates}},
   booktitle = {Proceedings of the 23rd International Conference on Business Information Systems (BIS), 2020},
   publisher = {Springer},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {1--1},
   type = {Konferenz-Beitrag},
   month = {Juni},
   year = {2020},
   keywords = {Vendor Lock-in; Cloud monitoring; Monitoring agents; Genericity},
   language = {Deutsch},
   cr-category = {H.4.0 Information Systems Applications General},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Cloud computing passed the hype cycle long ago and firmly established itself as a future technology since then. However, to utilize the cloud optimally, and therefore, as cost-efficiently as possible, a continuous monitoring is key to prevent an over- or under-commissioning of resources. However, selecting a suitable monitoring solution is a challenging task. Monitoring agents that collect monitoring data are spread across the monitored IT environment. Therefore, the possibility of vendor lock-ins leads to a lack of flexibility when the cloud environment or the business needs change. To handle these challenges, we introduce $\backslash$textit{generic agent templates} that are applicable to many monitoring systems and support a replacement of monitoring systems. Solution-specific technical details of monitoring agents are abstracted from and system administrators only need to model generic agents, which can be transformed into solution-specific monitoring agents. The transformation logic required for this process is provided by domain experts to not further burden system administrators. Furthermore, we introduce an agent lifecycle to support the system administrator with the management and deployment of generic agents.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-11&engl=0}
}
@inproceedings {INPROC-2020-08,
   author = {Mathias Mormul and Christoph Stach},
   title = {{A Context Model for Holistic Monitoring and Management of Complex IT Environments}},
   booktitle = {Proceedings of the 2020 IEEE International Conference on Pervasive Computing and Communications Workshops (CoMoRea)},
   publisher = {IEEE Computer Society},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {1--1},
   type = {Workshop-Beitrag},
   month = {M{\"a}rz},
   year = {2020},
   keywords = {Context Model; IT Operations; AIOps; Monitoring},
   language = {Englisch},
   cr-category = {C.0 Computer Systems Organization, General},
   contact = {Senden Sie eine E-Mail an mathias.mormul@ipvs.uni-stuttgart.de},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {The increased usage of IoT, containerization, and multiple clouds not only changed the way IT works but also the way IT Operations, i. e., the monitoring and management of IT assets, works. Monitoring a complex IT environment leads to massive amounts of heterogeneous context data, usually spread across multiple data silos, which needs to be analyzed and acted upon autonomously. However, for a holistic overview of the IT environment, context data needs to be consolidated which leads to several problems. For scalable and automated processes, it is essential to know what context is required for a given monitored resource, where the context data are originating from, and how to access them across the data silos. Therefore, we introduce the Monitoring Resource Model for the holistic management of context data. We show what context is essential for the management of monitored resources and how it can be used for context reasoning. Furthermore, we propose a multi-layered framework for IT Operations with which we present the benefits of the Monitoring Resource Model.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-08&engl=0}
}
@inproceedings {INPROC-2020-07,
   author = {Christoph Stach and Cl{\'e}mentine Gritti and Dennis Przytarski and Bernhard Mitschang},
   title = {{Trustworthy, Secure, and Privacy-aware Food Monitoring Enabled by Blockchains and the IoT}},
   booktitle = {Proceedings of the 18th Annual IEEE International Conference on Pervasive Computing and Communications Workshops (PerCom Workshops), 23-27 March, 2020, Austin, Texas, USA},
   publisher = {IEEE},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {1--4},
   type = {Konferenz-Beitrag},
   month = {M{\"a}rz},
   year = {2020},
   keywords = {Attribute-based Credentials; Blockchain; Data Authentication; IoT; Privacy; Service Utility; Transparency; Trust},
   language = {Englisch},
   cr-category = {K.6.5 Security and Protection,     D.4.6 Operating Systems Security and Protection},
   contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {A large number of food scandals (e.g., falsely declared meat or non-compliance with hygiene regulations) are causing considerable concern to consumers. Although Internet of Things (IoT) technologies are used in the food industry to monitor production (e.g., for tracing the origin of meat or monitoring cold chains), the gathered data are not used to provide full transparency to the consumer. To achieve this, however, three aspects must be considered: a) The origin of the data must be verifiable, i.e., it must be ensured that the data originate from calibrated sensors. b) The data must be stored tamper-resistant, immutable, and open to all consumers. c) Despite this openness, the privacy of affected data subjects (e.g., the carriers) must still be protected. To this end, we introduce the SHEEPDOG architecture that {\ss}hepherds`` products from production to purchase to enable a trustworthy, secure, and privacy-aware food monitoring. In SHEEPDOG, attribute-based credentials ensure trustworthy data acquisition, blockchain technologies provide secure data storage, and fine-grained access control enables privacy-aware data provision.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-07&engl=0}
}
@inproceedings {INPROC-2020-06,
   author = {Cornelia Kiefer and Peter Reimann and Bernhard Mitschang},
   title = {{Prevent Low-Quality Analytics by Automatic Selection of the Best-Fitting Training Data}},
   booktitle = {Proceedings of the 53rd Hawaii International Conference on System Sciences (HICSS)},
   address = {Maui, Hawaii, USA},
   publisher = {Online},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {1036--1045},
   type = {Konferenz-Beitrag},
   month = {Januar},
   year = {2020},
   isbn = {978-0-9981331-3-3},
   keywords = {data quality; domain-specific data analysis; text analysis; text similarity; training data},
   language = {Englisch},
   cr-category = {I.2.7 Natural Language Processing},
   ee = {https://scholarspace.manoa.hawaii.edu/bitstream/10125/63868/0103.pdf},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Data analysis pipelines consist of a sequence of various analysis tools. Most of these tools are based on supervised machine learning techniques and thus rely on labeled training data. Selecting appropriate training data has a crucial impact on analytics quality. Yet, most of the times, domain experts who construct analysis pipelines neglect the task of selecting appropriate training data. They rely on default training data sets, e.g., since they do not know which other training data sets exist and what they are used for. Yet, default training data sets may be very different from the domain-specific input data that is to be analyzed, leading to low-quality results. Moreover, these input data sets are usually unlabeled. Thus, information on analytics quality is not measurable with evaluation metrics. Our contribution comprises a method that (1) indicates the expected quality to the domain expert while constructing the analysis pipeline, without need for labels and (2) automatically selects the best-fitting training data. It is based on a measurement of the similarity between input and training data. In our evaluation, we consider the part-of-speech tagger tool and show that Latent Semantic Analysis (LSA) and Cosine Similarity are suited as indicators for the quality of analysis results and as basis for an automatic selection of the best-fitting training data.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-06&engl=0}
}
@inproceedings {INPROC-2020-04,
   author = {Christoph Stach and Cl{\'e}mentine Gritti and Bernhard Mitschang},
   title = {{Bringing Privacy Control back to Citizens: DISPEL - A Distributed Privacy Management Platform for the Internet of Things}},
   booktitle = {Proceedings of the 35th ACM/SIGAPP Symposium On Applied Computing (PDP).},
   address = {Brno, Czech Republic},
   publisher = {ACM Press},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {1--8},
   type = {Konferenz-Beitrag},
   month = {M{\"a}rz},
   year = {2020},
   keywords = {privacy; IoT; authorization concept; attribute-based access control},
   language = {Englisch},
   cr-category = {K.4.1 Computers and Society Public Policy Issues,     D.4.6 Operating Systems Security and Protection},
   contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {The Internet of Things (IoT) is becoming increasingly popular. It enables a variety of novel applications. Such applications require a lot of data about their users. To this end, sensors continuously monitor various aspects of daily life. Despite the indisputable benefits of IoT applications, this is a severe privacy threat. Due to the GDPR coming into force, there is a need for action on the part of IoT vendors. In this paper, we therefore introduce a Privacy by Design approach for IoT applications called DISPEL. It provides a configuration method enabling users to specify globally, which application may access what data for which purpose. Privacy protection is then applied at the earliest stage possible, i.e., directly on the IoT devices generating the data. Data transmission is protected against unauthorized access and manipulation. Evaluation results show that DISPEL fulfills the requirements towards an IoT privacy system.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-04&engl=0}
}
@inproceedings {INPROC-2020-03,
   author = {Christoph Stach and Corinna Giebler and Manuela Wagner and Christian Weber and Bernhard Mitschang},
   title = {{AMNESIA: A Technical Solution towards GDPR-compliant Machine Learning}},
   booktitle = {Proceedings of the 6th International Conference on Information Systems Security and Privacy (ICISSP 2020)},
   editor = {Steven Furnell and Paolo Mori and Edgar Weippl and Olivier Camp},
   address = {Valletta, Malta},
   publisher = {SciTePress},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {1--12},
   type = {Konferenz-Beitrag},
   month = {Februar},
   year = {2020},
   keywords = {Machine Learning; Data Protection; Privacy Zones; Access Control; Model Management; Provenance; GDPR},
   language = {Englisch},
   cr-category = {K.4.1 Computers and Society Public Policy Issues,     I.5.1 Pattern Recognition Models},
   contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Machine Learning (ML) applications are becoming increasingly valuable due to the rise of IoT technologies. That is, sensors continuously gather data from different domains and make them available to ML for learning its models. This provides profound insights into the data and enables predictions about future trends. While ML has many advantages, it also represents an immense privacy risk. Data protection regulations such as the GDPR address such privacy concerns, but practical solutions for the technical enforcement of these laws are also required. Therefore, we introduce AMNESIA, a privacy-aware machine learning model provisioning platform. AMNESIA is a holistic approach covering all stages from data acquisition to model provisioning. This enables to control which application may use which data for ML as well as to make models ``forget'' certain knowledge.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-03&engl=0}
}
@article {ART-2020-20,
   author = {Corinna Giebler and Christoph Gr{\"o}ger and Eva Hoos and Rebecca Eichler and Holger Schwarz and Bernhard Mitschang},
   title = {{Data Lakes auf den Grund gegangen - Herausforderungen und Forschungsl{\"u}cken in der Industriepraxis}},
   journal = {Datenbank Spektrum},
   publisher = {Springer},
   volume = {20},
   pages = {57--69},
   type = {Artikel in Zeitschrift},
   month = {Januar},
   year = {2020},
   keywords = {Data Lakes; Analytics; Stand der Technik; Herausforderungen; Praxisbeispiel},
   language = {Deutsch},
   cr-category = {H.4 Information Systems Applications},
   contact = {Senden Sie eine E-Mail an Corinna.Giebler@ipvs.uni-stuttgart.de},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Unternehmen stehen zunehmend vor der Herausforderung, gro{\ss}e, heterogene Daten zu verwalten und den darin enthaltenen Wert zu extrahieren. In den letzten Jahren kam darum der Data Lake als neuartiges Konzept auf, um diese komplexen Daten zu verwalten und zu nutzen. Wollen Unternehmen allerdings einen solchen Data Lake praktisch umsetzen, so sto{\ss}en sie auf vielf{\"a}ltige Herausforderungen, wie beispielsweise Widerspr{\"u}che in der Definition oder unscharfe und fehlende Konzepte. In diesem Beitrag werden konkrete Projekte eines global agierenden Industrieunternehmens genutzt, um bestehende Herausforderungen zu identifizieren und Anforderungen an Data Lakes herzuleiten. Diese Anforderungen werden mit der verf{\"u}gbaren Literatur zum Thema Data Lake sowie mit existierenden Ans{\"a}tzen aus der Forschung abgeglichen. Die Gegen{\"u}berstellung zeigt, dass f{\"u}nf gro{\ss}e Forschungsl{\"u}cken bestehen: 1. Unklare Datenmodellierungsmethoden, 2. Fehlende Data-Lake-Referenzarchitektur, 3. Unvollst{\"a}ndiges Metadatenmanagementkonzept, 4. Unvollst{\"a}ndiges Data-Lake-Governance-Konzept, 5. Fehlende ganzheitliche Realisierungsstrategie.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2020-20&engl=0}
}
@article {ART-2020-19,
   author = {Christoph Stach and Julia Br{\"a}cker and Rebecca Eichler and Corinna Giebler and Cl{\'e}mentine Gritti},
   title = {{How to Provide High-Utility Time Series Data in a Privacy-Aware Manner: A VAULT to Manage Time Series Data}},
   journal = {International Journal On Advances in Security},
   editor = {Hans-Joachim Hof Hof and Birgit Gersbeck-Schierholz},
   publisher = {IARIA},
   volume = {13},
   number = {3\&4},
   pages = {1--21},
   type = {Artikel in Zeitschrift},
   month = {Dezember},
   year = {2020},
   issn = {1942-2636},
   keywords = {Time Series Data; Privacy Filters; Aggregation; Interpolation; Smoothing; Information Emphasis; Noise; Data Quality; Authentication; Permission Model; Data Management},
   language = {Englisch},
   cr-category = {K.4.1 Computers and Society Public Policy Issues,     D.4.6 Operating Systems Security and Protection},
   contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Smart Services enrich many aspects of our daily lives, such as in the Ambient Assisted Living (AAL) domain, where the well-being of patients is automatically monitored, and patients have more autonomy as a result. A key enabler for such services is the Internet of Things (IoT). Using IoT-enabled devices, large amounts of (partly private) data are continuously captured, which can be then gathered and analyzed by Smart Services. Although these services bring many conveniences, they therefore also pose a serious threat to privacy. In order to provide the highest quality of service, they need access to as many data as possible and even reveal more private information due to in-depth data analyses. To ensure privacy, however, data minimization is required. Users are thus forced to balance between service quality and privacy. Current IoT privacy approaches do not re{\"\i}ect this discrepancy properly. Furthermore, as users are often not experienced in the proper handling of privacy mechanisms, this leads to an overly restrictive behavior. Instead of charging users with privacy control, we introduce VAULT, a novel approach towards a privacy-aware management of sensitive data. Since in the IoT time series data have a special position, VAULT is particularly tailored to this kind of data. It attempts to achieve the best possible tradeoff between service quality and privacy for each user. To this end, VAULT manages the data and enables a demand-based and privacy-aware provision of the data, by applying appropriate privacy {\"\i}lters which ful{\"\i}ll not only the quality requirements of the Smart Services but also the privacy requirements of users. In doing so, VAULT pursues a Privacy by Design approach.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2020-19&engl=0}
}
@article {ART-2020-11,
   author = {Corinna Giebler and Christoph Gr{\"o}ger and Eva Hoos and Rebecca Eichler and Holger Schwarz and Bernhard Mitschang},
   title = {{Data Lakes auf den Grund gegangen - Herausforderungen und Forschungsl{\"u}cken in der Industriepraxis}},
   journal = {Datenbank Spektrum},
   publisher = {Springer-Verlag},
   volume = {20},
   pages = {57--69},
   type = {Artikel in Zeitschrift},
   month = {Januar},
   year = {2020},
   keywords = {Data Lakes; Industryerfahrung},
   language = {Deutsch},
   cr-category = {H.2.1 Database Management Logical Design},
   contact = {Senden Sie eine E-Mail an Corinna.Giebler@ipvs.uni-stuttgart.de},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Unternehmen stehen zunehmend vor der Herausforderung, gro{\ss}e, heterogene Daten zu verwalten und den darin enthaltenen Wert zu extrahieren. In den letzten Jahren kam darum der Data Lake als neuartiges Konzept auf, um diese komplexen Daten zu verwalten und zu nutzen. Wollen Unternehmen allerdings einen solchen Data Lake praktisch umsetzen, so sto{\ss}en sie auf vielf{\"a}ltige Herausforderungen, wie beispielsweise Widerspr{\"u}che in der Definition oder unscharfe und fehlende Konzepte. In diesem Beitrag werden konkrete Projekte eines global agierenden Industrieunternehmens genutzt, um bestehende Herausforderungen zu identifizieren und Anforderungen an Data Lakes herzuleiten. Diese Anforderungen werden mit der verf{\"u}gbaren Literatur zum Thema Data Lake sowie mit existierenden Ans{\"a}tzen aus der Forschung abgeglichen. Die Gegen{\"u}berstellung zeigt, dass f{\"u}nf gro{\ss}e Forschungsl{\"u}cken bestehen: 1. Unklare Datenmodellierungsmethoden, 2. Fehlende Data-Lake-Referenzarchitektur, 3. Unvollst{\"a}ndiges Metadatenmanagementkonzept, 4. Unvollst{\"a}ndiges Data-Lake-Governance-Konzept, 5. Fehlende ganzheitliche Realisierungsstrategie.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2020-11&engl=0}
}
@article {ART-2020-10,
   author = {Corinna Giebler and Christoph Gr{\"o}ger and Eva Hoos and Rebecca Eichler and Holger Schwarz and Bernhard Mitschang},
   title = {{Data Lakes auf den Grund gegangen - Herausforderungen und Forschungsl{\"u}cken in der Industriepraxis}},
   journal = {Datenbank Spektrum},
   publisher = {Springer-Verlag},
   volume = {20},
   pages = {57--69},
   type = {Artikel in Zeitschrift},
   month = {Januar},
   year = {2020},
   keywords = {Data Lakes; Industryerfahrung},
   language = {Deutsch},
   cr-category = {H.2.1 Database Management Logical Design},
   contact = {Senden Sie eine E-Mail an Corinna.Giebler@ipvs.uni-stuttgart.de},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Unternehmen stehen zunehmend vor der Herausforderung, gro{\ss}e, heterogene Daten zu verwalten und den darin enthaltenen Wert zu extrahieren. In den letzten Jahren kam darum der Data Lake als neuartiges Konzept auf, um diese komplexen Daten zu verwalten und zu nutzen. Wollen Unternehmen allerdings einen solchen Data Lake praktisch umsetzen, so sto{\ss}en sie auf vielf{\"a}ltige Herausforderungen, wie beispielsweise Widerspr{\"u}che in der Definition oder unscharfe und fehlende Konzepte. In diesem Beitrag werden konkrete Projekte eines global agierenden Industrieunternehmens genutzt, um bestehende Herausforderungen zu identifizieren und Anforderungen an Data Lakes herzuleiten. Diese Anforderungen werden mit der verf{\"u}gbaren Literatur zum Thema Data Lake sowie mit existierenden Ans{\"a}tzen aus der Forschung abgeglichen. Die Gegen{\"u}berstellung zeigt, dass f{\"u}nf gro{\ss}e Forschungsl{\"u}cken bestehen: 1. Unklare Datenmodellierungsmethoden, 2. Fehlende Data-Lake-Referenzarchitektur, 3. Unvollst{\"a}ndiges Metadatenmanagementkonzept, 4. Unvollst{\"a}ndiges Data-Lake-Governance-Konzept, 5. Fehlende ganzheitliche Realisierungsstrategie.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2020-10&engl=0}
}
@article {ART-2020-04,
   author = {Corinna Giebler and Christoph Gr{\"o}ger and Eva Hoos and Rebecca Eichler and Holger Schwarz and Bernhard Mitschang},
   title = {{Data Lakes auf den Grund gegangen: Herausforderungen und Forschungsl{\"u}cken in der Industriepraxis}},
   journal = {Datenbank-Spektrum},
   publisher = {Springer},
   volume = {20},
   number = {1},
   pages = {57--69},
   type = {Artikel in Zeitschrift},
   month = {Januar},
   year = {2020},
   doi = {10.1007/s13222-020-00332-0},
   keywords = {Data Lake; Analytics; Stand der Technik; Herausforderungen; Praxisbeispiel},
   language = {Deutsch},
   cr-category = {A.1 General Literature, Introductory and Survey,     E.0 Data General},
   ee = {https://rdcu.be/b0WM8},
   contact = {Senden Sie eine E-Mail an Corinna.Giebler@ipvs.uni-stuttgart.de},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Unternehmen stehen zunehmend vor der Herausforderung, gro{\ss}e, heterogene Daten zu verwalten und den darin enthaltenen Wert zu extrahieren. In den letzten Jahren kam darum der Data Lake als neuartiges Konzept auf, um diese komplexen Daten zu verwalten und zu nutzen. Wollen Unternehmen allerdings einen solchen Data Lake praktisch umsetzen, so sto{\ss}en sie auf vielf{\"a}ltige Herausforderungen, wie beispielsweise Widerspr{\"u}che in der Definition oder unscharfe und fehlende Konzepte. In diesem Beitrag werden konkrete Projekte eines global agierenden Industrieunternehmens genutzt, um bestehende Herausforderungen zu identifizieren und Anforderungen an Data Lakes herzuleiten. Diese Anforderungen werden mit der verf{\"u}gbaren Literatur zum Thema Data Lake sowie mit existierenden Ans{\"a}tzen aus der Forschung abgeglichen. Die Gegen{\"u}berstellung zeigt, dass f{\"u}nf gro{\ss}e Forschungsl{\"u}cken bestehen: 1. Unklare Datenmodellierungsmethoden, 2. Fehlende Data-Lake-Referenzarchitektur, 3. Unvollst{\"a}ndiges Metadatenmanagementkonzept, 4. Unvollst{\"a}ndiges Data-Lake-Governance-Konzept, 5. Fehlende ganzheitliche Realisierungsstrategie.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2020-04&engl=0}
}
 
Zum Seitenanfang