Publikationen AS: Bibliographie 2020 BibTeX
@inproceedings {INPROC-2020-57,
author = {Simon Dreher and Peter Reimann and Christoph Gr{\"o}ger},
title = {{Application Fields and Research Gaps of Process Mining in Manufacturing Companies}},
booktitle = {Proceedings of INFORMATIK 2020},
editor = {R. H. Reussner and A Koziolek and R. Heinrich},
publisher = {GI Gesellschaft f{\"u}r Informatik e.V. (GI)},
institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
pages = {621--634},
type = {Konferenz-Beitrag},
month = {Oktober},
year = {2020},
keywords = {Process Mining; Application; Production; Manufacturing; SCOR; Literature Review},
language = {Englisch},
cr-category = {H.2.8 Database Applications},
department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
abstract = {To survive in global competition with increasing cost pressure, manufacturing
companies must continuously optimize their manufacturing-related processes.
Thereby, process mining constitutes an important data-driven approach to gain a
profound understanding of the actual processes and to identify optimization
potentials by applying data mining and machine learning techniques on event
data. However, there is little knowledge about the feasibility and usefulness
of process mining specifically in manufacturing companies. Hence, this paper
provides an overview of potential applications of process mining for the
analysis of manufacturing-related processes. We conduct a systematic literature
review, classify relevant articles according to the
Supply-Chain-Operations-Reference-Model (SCOR-model), identify research gaps,
such as domain-specific challenges regarding unstructured, cascaded and
non-linear processes or heterogeneous data sources, and give practitioners
inspiration which manufacturing-related processes can be analyzed by process
mining techniques.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-57&engl=0}
}
@inproceedings {INPROC-2020-56,
author = {Christian Weber and Peter Reimann},
title = {{MMP - A Platform to Manage Machine Learning Models in Industry 4.0 Environments}},
booktitle = {Proceedings of the IEEE 24th International Enterprise Distributed Object Computing Workshop (EDOCW)},
address = {Eindhoven, The Netherlands},
publisher = {IEEE},
institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
type = {Demonstration},
month = {Juli},
year = {2020},
keywords = {Model Management; Machine Learning; Collaborative Data Science},
language = {Englisch},
cr-category = {H.3.4 Information Storage and Retrieval Systems and Software},
department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
abstract = {In manufacturing environments, machine learning models are being built for
several use cases, such as predictive maintenance and product quality control.
In this context, the various manufacturing processes, machines, and product
variants make it necessary to create and use lots of different machine learning
models. This calls for a software system that is able to manage all these
diverse machine learning models and associated metadata. However, current model
management systems do not associate models with business and domain context to
provide non-expert users with tailored functions for model search and
discovery. Moreover, none of the existing systems provides a comprehensive
overview of all models within an organization. In our demonstration, we present
the MMP, our model management platform that addresses these issues. The MMP
provides a model metadata extractor, a model registry, and a context manager to
store model metadata in a central metadata store. On top of this, the MMP
provides frontend components that offer the above-mentioned functionalities. In
our demonstration, we show two scenarios for model management in Industry 4.0
environments that illustrate the novel functionalities of the MMP. We
demonstrate to the audience how the platform and its metadata, linking models
to their business and domain context, help non-expert users to search and
discover models. Furthermore, we show how to use MMP's powerful visualizations
for model reporting, such as a dashboard and a model landscape view.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-56&engl=0}
}
@inproceedings {INPROC-2020-55,
author = {Corinna Giebler and Christoph Gr{\"o}ger and Eva Hoos and Holger Schwarz and Bernhard Mitschang},
title = {{A Zone Reference Model for Enterprise-Grade Data Lake Management}},
booktitle = {Proceedings of the 24th IEEE Enterprise Computing Conference},
publisher = {IEEE},
institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
pages = {57--66},
type = {Konferenz-Beitrag},
month = {Oktober},
year = {2020},
keywords = {Data Lake; Zones; Reference Model; Industry Case; Industry Experience},
language = {Englisch},
cr-category = {H.4 Information Systems Applications},
contact = {Senden Sie eine E-Mail an corinna.giebler@ipvs.uni-stuttgart.de},
department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
abstract = {Data lakes are on the rise as data platforms for any kind of analytics, from
data exploration to machine learning. They achieve the required flexibility by
storing heterogeneous data in their raw format, and by avoiding the need for
pre-defined use cases. However, storing only raw data is inefficient, as for
many applications, the same data processing has to be applied repeatedly. To
foster the reuse of processing steps, literature proposes to store data in
different degrees of processing in addition to their raw format. To this end,
data lakes are typically structured in zones. There exists various zone models,
but they are varied, vague, and no assessments are given. It is unclear which
of these zone models is applicable in a practical data lake implementation in
enterprises. In this work, we assess existing zone models using requirements
derived from multiple representative data analytics use cases of a real-world
industry case. We identify the shortcomings of existing work and develop a zone
reference model for enterprise-grade data lake management in a detailed manner.
We assess the reference model's applicability through a prototypical
implementation for a real-world enterprise data lake use case. This assessment
shows that the zone reference model meets the requirements relevant in practice
and is ready for industry use.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-55&engl=0}
}
@inproceedings {INPROC-2020-54,
author = {Manuel Fritz and Dennis Tschechlov and Holger Schwarz},
title = {{Learning from Past Observations: Meta-Learning for Efficient Clustering Analyses}},
booktitle = {Proceedings of 22nd Big Data Analytics and Knowledge Discovery (DaWaK), 2020},
editor = {Min Song and Il-Yeol Song and Gabriele Kotsis and A Min Tjoa and Ismail Khalil},
publisher = {Springer, Cham},
institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
series = {Lecture Notes in Computer Science},
volume = {12393},
pages = {364--379},
type = {Konferenz-Beitrag},
month = {September},
year = {2020},
isbn = {978-3-030-59065-9},
doi = {https://doi.org/10.1007/978-3-030-59065-9_28},
language = {Englisch},
cr-category = {H.3.3 Information Search and Retrieval},
department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
abstract = {Many clustering algorithms require the number of clusters as input parameter
prior to execution. Since the {\^a}best{\^a} number of clusters is most often
unknown in advance, analysts typically execute clustering algorithms multiple
times with varying parameters and subsequently choose the most promising
result. Several methods for an automated estimation of suitable parameters have
been proposed. Similar to the procedure of an analyst, these estimation methods
draw on repetitive executions of a clustering algorithm with varying
parameters. However, when working with voluminous datasets, each single
execution tends to be very time-consuming. Especially in today{\^a}s Big Data
era, such a repetitive execution of a clustering algorithm is not feasible for
an efficient exploration. We propose a novel and efficient approach to
accelerate estimations for the number of clusters in datasets. Our approach
relies on the idea of meta-learning and terminates each execution of the
clustering algorithm as soon as an expected qualitative demand is met. We show
that this new approach is generally applicable, i.e., it can be used with
existing estimation methods. Our comprehensive evaluation reveals that our
approach is able to speed up the estimation of the number of clusters by an
order of magnitude, while still achieving accurate estimates.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-54&engl=0}
}
@inproceedings {INPROC-2020-53,
author = {Manuel Fritz and Michael Behringer and Holger Schwarz},
title = {{LOG-Means: Efficiently Estimating the Number of Clusters in Large Datasets}},
booktitle = {Proceedings of the 46th International Conference on Very Large Databases (VLDB)},
editor = {Magdalena Balazinska and Xiaofang Zhou},
publisher = {ACM Digital Library},
institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
series = {Proceedings of the VLDB Endowment},
volume = {13 (12)},
pages = {2118--2131},
type = {Konferenz-Beitrag},
month = {August},
year = {2020},
isbn = {ISSN 2150-8097},
doi = {https://doi.org/10.14778/3407790.3407813},
language = {Englisch},
cr-category = {H.2.8 Database Applications},
department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
abstract = {Clustering is a fundamental primitive in manifold applications. In order to
achieve valuable results, parameters of the clustering algorithm, e.g., the
number of clusters, have to be set appropriately, which is a tremendous
pitfall. To this end, analysts rely on their domain knowledge in order to
define parameter search spaces. While experienced analysts may be able to
define a small search space, especially novice analysts often define rather
large search spaces due to the lack of in-depth domain knowledge. These search
spaces can be explored in different ways by estimation methods for the number
of clusters. In the worst case, estimation methods perform an exhaustive search
in the given search space, which leads to infeasible runtimes for large
datasets and large search spaces. We propose LOG-Means, which is able to
overcome these issues of existing methods. We show that LOG-Means provides
estimates in sublinear time regarding the defined search space, thus being a
strong fit for large datasets and large search spaces. In our comprehensive
evaluation on an Apache Spark cluster, we compare LOG-Means to 13 existing
estimation methods. The evaluation shows that LOG-Means significantly
outperforms these methods in terms of runtime and accuracy. To the best of our
knowledge, this is the most systematic comparison on large datasets and search
spaces as of today.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-53&engl=0}
}
@inproceedings {INPROC-2020-52,
author = {Daniel Del Gaudio and Maximilian Reichel and Pascal Hirmer},
title = {{A Life Cycle Method for Device Management in Dynamic IoT Environments}},
booktitle = {Proceedings of the 5th International Conference on Internet of Things, Big Data and Security - Volume 1: IoTBDS},
publisher = {SciTePress Digital Library},
institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
pages = {46--56},
type = {Konferenz-Beitrag},
month = {Mai},
year = {2020},
keywords = {Internet of Things, Discovery, Device Integration, Decentralization},
language = {Englisch},
cr-category = {C.2.4 Distributed Systems},
department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
abstract = {In the Internet of Things, interconnected devices communicate with each other
through standardized internet protocols to reach common goals. By doing so,
they enable building complex, self-organizing applications, such as Smart
Cities, or Smart Factories. Especially in large IoT environments, newly
appearing devices as well as leaving or failing IoT devices are a great
challenge. New devices need to be integrated into the application whereas
failing devices need to be dealt with. In a Smart City, newly appearing actors,
for example, smart phones or connected cars, appear and disappear all the time.
Dealing with this dynamic is a great issue, especially when done automatically.
Consequently, in this paper, we introduce A Life Cycle Method for Device
Management in Dynamic IoT Environments. This method enables integrating newly
appearing IoT devices into IoT applications and, furthermore, offers means to
cope with failing devices. Our approach is evaluated through a system
architecture a nd a corresponding prototypical implementation.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-52&engl=0}
}
@inproceedings {INPROC-2020-51,
author = {Daniel Del Gaudio and Pascal Hirmer},
title = {{Fulfilling the IoT Vision: Are We There Yet?}},
booktitle = {In Proceedings of the 5th International Conference on Internet of Things, Big Data and Security - Volume 1: IoTBDS},
publisher = {SciTePress Digital Library},
institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
pages = {367--374},
type = {Konferenz-Beitrag},
month = {Mai},
year = {2020},
isbn = {978-989-758-426-8},
keywords = {Internet of Things, Decentralized, Autonomous, Dynamic, Smart},
language = {Deutsch},
cr-category = {C.2 Computer-Communication Networks},
department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
abstract = {The vision of the Internet of Things is enabling self-controlled and
decentralized environments, in which hard- ware devices, equipped with sensors
and actuators communicate with each other trough standardized internet
protocols to reach common goals. The device-to-device communication should be
decentralized and should not necessarily require human interaction. However,
enabling such complex IoT applications, e.g., connected cars, is a big
challenge, since many requirements need to be fulfilled. These requirements
include, for exam- ple, security, privacy, timely data processing, uniform
communication standards, or location-awareness. Based on an intensive
literature review, in this overview paper, we define requirements for such
environments and, in addition, we discuss whether they are fulfilled by
state-of-the-art approaches or whether there still has to be work done in the
future. We conclude this paper by illustrating research gaps that have to be
filled in order to realize the IoT vision.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-51&engl=0}
}
@inproceedings {INPROC-2020-50,
author = {Rebecca Eichler and Corinna Giebler and Christoph Gr{\"o}ger and Holger Schwarz and Bernhard Mitschang},
title = {{HANDLE - A Generic Metadata Model for Data Lakes}},
booktitle = {Big Data Analytics and Knowledge Discovery: 22nd International Conference, DaWaK 2020, Bratislava, Slovakia, September 1417, 2020, Proceedings},
publisher = {Springer, Cham},
institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
pages = {73--88},
type = {Konferenz-Beitrag},
month = {September},
year = {2020},
language = {Englisch},
cr-category = {H.2 Database Management},
department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
abstract = {The substantial increase in generated data induced the development of new
concepts such as the data lake. A data lake is a large storage repository
designed to enable flexible extraction of the data{\^a}s value. A key aspect of
exploiting data value in data lakes is the collection and management of
metadata. To store and handle the metadata, a generic metadata model is
required that can reflect metadata of any potential metadata management use
case, e.g., data versioning or data lineage. However, an evaluation of existent
metadata models yields that none so far are sufficiently generic. In this work,
we present HANDLE, a generic metadata model for data lakes, which supports the
flexible integration of metadata, data lake zones, metadata on various granular
levels, and any metadata categorization. With these capabilities HANDLE enables
comprehensive metadata management in data lakes. We show HANDLE{\^a}s feasibility
through the application to an exemplary access-use-case and a prototypical
implementation. A comparison with existent models yields that HANDLE can
reflect the same information and provides additional capabilities needed for
metadata management in data lakes.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-50&engl=0}
}
@inproceedings {INPROC-2020-48,
author = {Dennis Przytarski and Christoph Stach and Cl{\'e}mentine Gritti and Bernhard Mitschang},
title = {{A Blueprint for a Trustworthy Health Data Platform Encompassing IoT and Blockchain Technologies}},
booktitle = {Proceedings of the ISCA 29th International Conference on Software Engineering and Data Engineering (Las Vegas, October 2020)},
publisher = {ISCA in Cooperation with IEEE Computer Society},
institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Germany},
pages = {1--10},
type = {Konferenz-Beitrag},
month = {Oktober},
year = {2020},
language = {Englisch},
cr-category = {H.2.7 Database Administration,
K.6.5 Security and Protection},
department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte H{\"o}chstleistungsrechner, Anwendersoftware},
abstract = {eHealth provides great relief for patients and physicians. This means, patients
autonomously monitor their condition via IoT medical devices and make these
data available to physicians for analyses. This requires a data platform that
takes care of data acquisition, management, and provisioning. As health data
are highly sensitive, there are major concerns regarding data security with
respect to confidentiality, integrity, and authenticity. To this end, we
present a blueprint for constructing a trustworthy health data platform called
SEAL. It provides a lightweight attribute-based authentication mechanism for
IoT devices to validate all involved data sources, there is a fine-grained data
provisioning system to enable data provision according to actual requirements,
and a verification procedure ensures that data cannot be manipulated.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-48&engl=0}
}
@inproceedings {INPROC-2020-45,
author = {Rebecca Eichler and Corinna Giebler and Christoph Gr{\"o}ger and Holger Schwarz and Bernhard Mitschang},
title = {{HANDLE - A Generic Metadata Model for Data Lakes}},
booktitle = {Big Data Analytics and Knowledge Discovery},
editor = {Min Song and Il-Yeol Song and Gabriele Kotsis and A Min Tjoa and Ismail Khalil},
publisher = {Springer Nature Switzerland AG},
institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
series = {Lecture Notes in Computer Science},
volume = {12393},
pages = {73--88},
type = {Konferenz-Beitrag},
month = {September},
year = {2020},
doi = {https://doi.org/10.1007/978-3-030-59065-9_7},
keywords = {Metadata management; Metadata model; Data lake},
language = {Englisch},
cr-category = {H.2 Database Management},
ee = {ftp://ftp.informatik.uni-stuttgart.de/pub/library/ncstrl.ustuttgart_fi/INPROC-2020-45/INPROC-2020-45.pdf},
department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
abstract = {The substantial increase in generated data induced the development of new
concepts such as the data lake. A data lake is a large storage repository
designed to enable flexible extraction of the data's value. A key aspect of
exploiting data value in data lakes is the collection and management of
metadata. To store and handle the metadata, a generic metadata model is
required that can reflect metadata of any potential metadata management use
case, e.g., data versioning or data lineage. However, an evaluation of existent
metadata models yields that none so far are sufficiently generic. In this work,
we present HANDLE, a generic metadata model for data lakes, which supports the
flexible integration of metadata, data lake zones, metadata on various granular
levels, and any metadata categorization. With these capabilities HANDLE enables
comprehensive metadata management in data lakes. We show HANDLE's feasibility
through the application to an exemplary access-use-case and a prototypical
implementation. A comparison with existent models yields that HANDLE can
reflect the same information and provides additional capabilities needed for
metadata management in data lakes.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-45&engl=0}
}
@inproceedings {INPROC-2020-39,
author = {Ana Cristina Franco da Silva and Pascal Hirmer and Jan Schneider and Seda Ulusal and Matheus Tavares Frigo},
title = {{MBP: Not just an IoT Platform}},
booktitle = {2020 IEEE International Conference on Pervasive Computing and Communications Workshops (PerCom Workshops)},
publisher = {Institute of Electrical and Electronics Engineers, Inc. (IEEE)},
institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
pages = {1--3},
type = {Demonstration},
month = {August},
year = {2020},
isbn = {978-1-7281-4716-1},
doi = {10.1109/PerComWorkshops48775.2020.9156156},
keywords = {Internet of Things; Sensor Integration; IoT environments; Complex Event Processing},
language = {Englisch},
cr-category = {H.0 Information Systems General},
ee = {https://ieeexplore.ieee.org/document/9156156},
department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
abstract = {In this demonstration paper, we introduce the Multi-purpose Binding and
Provisioning Platform (MBP), an open-source IoT platform developed for easy
binding, provisioning, and management of IoT environments. Furthermore, the MBP
enables the simple realization of IoT applications, such as heating,
ventilation, air conditioning (HVAC) systems, by allowing users to create rules
for the IoT environment, in a straightforward and event-condition-action
fashion. The efficient and timely data processing of IoT environments are
assured through underlying complex event processing technologies.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-39&engl=0}
}
@inproceedings {INPROC-2020-38,
author = {Alejandro Villanueva Zacarias and Rachaa Ghabri and Peter Reimann},
title = {{AD4ML: Axiomatic Design to Specify Machine Learning Solutions for Manufacturing}},
booktitle = {Proceedings of the 21st International Conference on Information Reuse and Integration for Data Science},
publisher = {IEEE},
institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
type = {Konferenz-Beitrag},
month = {August},
year = {2020},
language = {Englisch},
cr-category = {H.2.8 Database Applications},
contact = {manufacturing; machine-learning; design},
department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
abstract = {Machine learning is increasingly adopted in manufacturing use cases, e.g., for
fault detection in a production line. Each new use case requires developing its
own machine learning (ML) solution. A ML solution integrates different software
components to read, process, and analyze all use case data, as well as to
finally generate the output that domain experts need for their decision-making.
The process to design a system specification for a ML solution is not
straight-forward. It entails two types of complexity: (1) The technical
complexity of selecting combinations of ML algorithms and software components
that suit a use case; (2) the organizational complexity of integrating
different requirements from a multidisciplinary team of, e.g., domain experts,
data scientists, and IT specialists. In this paper, we propose several
adaptations to Axiomatic Design in order to design ML solution specifications
that handle these complexities. We call this Axiomatic Design for Machine
Learning (AD4ML). We apply AD4ML to specify a ML solution for a fault detection
use case and discuss to what extent our approach conquers the above-mentioned
complexities. We also discuss how AD4ML facilitates the agile design of ML
solutions.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-38&engl=0}
}
@inproceedings {INPROC-2020-37,
author = {Mathias Mormul and Pascal Hirmer and Christoph Stach and Bernhard Mitschang},
title = {{DEAR: Distributed Evaluation of Alerting Rules}},
booktitle = {IEEE 13th International Conference on Cloud Computing (CLOUD)},
publisher = {IEEE},
institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
pages = {1--1},
type = {Konferenz-Beitrag},
month = {Dezember},
year = {2020},
keywords = {cloud monitoring; agent-based; alerting},
language = {Englisch},
cr-category = {H.0 Information Systems General},
contact = {mathias.mormul@ipvs.uni-stuttgart.de},
department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
abstract = {Cloud computing passed the hype cycle long ago and firmly established itself as
a future technology since then. However, to utilize the cloud as
cost-efficiently as possible, a continuous monitoring is key to prevent an
over- or undercommissioning of resources. In large-scaled scenarios, several
challenges for cloud monitoring, such as high network traffic volume, low
accuracy of monitoring data, and high time-toinsight, require new approaches in
IT Operations while considering administrative complexity. To handle these
challenges, we present DEAR, the Distributed Evaluation of Alerting Rules. DEAR
is a plugin for monitoring systems which automatically distributes alerting
rules to the monitored resources to solve the trade-off between high accuracy
and low network traffic volume without administrative overhead. We evaluate our
approach against requirements of today{\^a}s IT monitoring and compare it to
conventional agent-based monitoring approaches.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-37&engl=0}
}
@inproceedings {INPROC-2020-32,
author = {Vitali Hirsch and Peter Reimann and Bernhard Mitschang},
title = {{Exploiting Domain Knowledge to Address Multi-Class Imbalance and a Heterogeneous Feature Space in Classification Tasks for Manufacturing Data}},
booktitle = {Proceedings of the 46th International Conference on Very Large Databases (VLDB)},
editor = {Magdalena Balazinska and Xiaofang Zhou},
publisher = {ACM Digital Library},
institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
series = {Proceedings of the VLDB Endowment},
volume = {13(12)},
type = {Konferenz-Beitrag},
month = {August},
year = {2020},
language = {Englisch},
cr-category = {H.2.8 Database Applications},
department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
abstract = {Classification techniques are increasingly adopted for quality control in
manufacturing, e. g., to help domain experts identify the cause of quality
issues of defective products. However, real-world data often imply a set of
analytical challenges, which lead to a reduced classification performance.
Major challenges are a high degree of multi-class imbalance within data and a
heterogeneous feature space that arises from the variety of underlying
products. This paper considers such a challenging use case in the area of
End-of-Line testing, i. e., the final functional test of complex products.
Existing solutions to classification or data pre-processing only address
individual analytical challenges in isolation. We propose a novel
classification system that explicitly addresses both challenges of multi-class
imbalance and a heterogeneous feature space together. As main contribution,
this system exploits domain knowledge to systematically prepare the training
data. Based on an experimental evaluation on real-world data, we show that our
classification system outperforms any other classification technique in terms
of accuracy. Furthermore, we can reduce the amount of rework required to solve
a quality issue of a product.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-32&engl=0}
}
@inproceedings {INPROC-2020-31,
author = {Yannick Wilhelm and Peter Reimann and Wolfgang Gauchel and Bernhard Mitschang},
title = {{Overview on Hybrid Approaches to Fault Detection and Diagnosis: Combining Data-driven, Physics-based and Knowledge-based Models}},
booktitle = {Procedia CIRP: Proceedings of the 14th CIRP Conference on Intelligent Computation in Manufacturing Engineering (CIRP ICME)},
publisher = {Elsevier BV},
institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
type = {Konferenz-Beitrag},
month = {Juli},
year = {2020},
keywords = {Fault Detection; Fault Diagnosis; Hybrid Methods; Diagnostics and Maintenance; Knowledge-driven Methods; Machine Learning},
language = {Englisch},
cr-category = {H.2.8 Database Applications,
I.2.1 Applications and Expert Systems},
department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
abstract = {In this paper, we review hybrid approaches for fault detection and fault
diagnosis (FDD) that combine data-driven analysis with physics-based and
knowledge-based models to overcome a lack of data and to increase the FDD
accuracy. We categorize these hybrid approaches according to the steps of an
extended common workflow for FDD. This gives practitioners indications of which
kind of hybrid FDD approach they can use in their application.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-31&engl=0}
}
@inproceedings {INPROC-2020-20,
author = {Yannick Wilhelm and Ulf Schreier and Peter Reimann and Bernhard Mitschang and Holger Ziekow},
title = {{Data Science Approaches to Quality Control in Manufacturing: A Review of Problems, Challenges and Architecture}},
booktitle = {Springer Proceedings Series Communications in Computer and Information Science (CCIS)},
publisher = {Springer},
institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
type = {Konferenz-Beitrag},
month = {Oktober},
year = {2020},
keywords = {Data Science; Machine Learning; Quality Control; Challenges; Functional Architecture},
language = {Englisch},
cr-category = {H.2.8 Database Applications},
department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
abstract = {Manufacturing environments are characterized by non-stationary processes,
constantly varying conditions, complex process interdependencies, and a high
number of product variants. These and other aspects pose several challenges for
common machine learning algorithms to achieve reliable and accurate
predictions. This overview and vision paper provides a comprehensive list of
common problems and challenges for data science approaches to quality control
in manufacturing. We have derived these problems and challenges by inspecting
three real-world use cases in the eld of product quality control and via a
comprehensive literature study. We furthermore associate the identi ed problems
and challenges to individual layers and components of a functional setup, as it
can be found in manufacturing environments today. Additionally, we extend and
revise this functional setup and this way propose our vision of a future
functional software architecture. This functional architecture represents a
visionary blueprint for solutions that are able to address all challenges for
data science approaches in manufacturing quality control.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-20&engl=0}
}
@inproceedings {INPROC-2020-19,
author = {Christian Weber and Pascal Hirmer and Peter Reimann},
title = {{A Model Management Platform for Industry 4.0 - Enabling Management of Machine Learning Models in Manufacturing Environments}},
booktitle = {Proceedings of the 23rd International Conference on Business Information Systems (BIS)},
editor = {Witold Abramowicz and Rainer Alt and Gary Klein and Adrian Paschke and Kurt Sandkuhl},
publisher = {Springer International Publishing},
institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
series = {Lecture Notes in Business Information Processing},
type = {Konferenz-Beitrag},
month = {November},
year = {2020},
issn = {1865-1348},
keywords = {Model Management; Machine Learning; Metadata Tracking},
language = {Englisch},
cr-category = {H.3.4 Information Storage and Retrieval Systems and Software},
department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
abstract = {Industry 4.0 use cases such as predictive maintenance and product quality
control make it necessary to create, use and maintain a multitude of di erent
machine learning models. In this setting, model management systems help to
organize models. However, concepts for model management systems currently focus
on data scientists, but do not support non-expert users such as domain experts
and business analysts. Thus, it is dicult for them to reuse existing models
for their use cases. In this paper, we address these challenges and present an
architecture, a metadata schema and a corresponding model management platform.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-19&engl=0}
}
@inproceedings {INPROC-2020-18,
author = {Julian Ziegler and Peter Reimann and Florian Keller and Bernhard Mitschang},
title = {{A Graph-based Approach to Manage CAE Data in a Data Lake}},
booktitle = {Procedia CIRP: Proceedings of the 53rd CIRP Conference on Manufacturing Systems (CIRP CMS 2020)},
publisher = {Elsevier},
institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
type = {Konferenz-Beitrag},
month = {Juli},
year = {2020},
language = {Englisch},
cr-category = {H.2.8 Database Applications},
department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
abstract = {Computer-aided engineering (CAE) applications generate vast quantities of
heterogeneous data. Domain experts often fail to explore and analyze these
data, because they are not integrated across di erent applications. Existing
data management solutions are rather tailored to scientific applications. In
our approach, we tackle this issue by combining a data lake solution with
graph-based metadata management. This provides a holistic view of all CAE data
and of the data-generating applications in one interconnected structure. Based
on a prototypical implementation, we discuss how this eases the task of domain
experts to explore and extract data for further analyses.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-18&engl=0}
}
@inproceedings {INPROC-2020-17,
author = {Vitali Hirsch and Peter Reimann and Bernhard Mitschang},
title = {{Incorporating Economic Aspects into Recommendation Ranking to Reduce Failure Costs}},
booktitle = {Procedia CIRP: Proceedings of the 53rd CIRP Conference on Manufacturing Systems (CIRP CMS 2020)},
publisher = {Elsevier},
institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
type = {Konferenz-Beitrag},
month = {Juli},
year = {2020},
keywords = {decision support; predictive analytics; quality control; End-of-Line testing; classification; fault isolation; failure costs},
language = {Englisch},
cr-category = {H.2.8 Database Applications},
department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
abstract = {Machine learning approaches for manufacturing usually o er recommendation
lists, e.g., to support humans in fault diagnosis. For instance, if a product
does not pass the final check after the assembly, a recommendation list may
contain likely faulty product components to be replaced. Thereby, the list
ranks these components using their probabilities. However, these probabilities
often di er marginally, while economic impacts, e.g., the costs for replacing
components, di er significantly. We address this issue by proposing an approach
that incorporates costs to re-rank a list. Our evaluation shows that this
approach reduces fault-related costs when using recommendation lists to support
human labor.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-17&engl=0}
}
@inproceedings {INPROC-2020-11,
author = {Mathias Mormul and Pascal Hirmer and Christoph Stach and Bernhard Mitschang},
title = {{Avoiding Vendor-Lockin in Cloud Monitoring using Generic Agent Templates}},
booktitle = {Proceedings of the 23rd International Conference on Business Information Systems (BIS), 2020},
publisher = {Springer},
institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
pages = {1--1},
type = {Konferenz-Beitrag},
month = {Juni},
year = {2020},
keywords = {Vendor Lock-in; Cloud monitoring; Monitoring agents; Genericity},
language = {Deutsch},
cr-category = {H.4.0 Information Systems Applications General},
department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
abstract = {Cloud computing passed the hype cycle long ago and firmly established itself as
a future technology since then. However, to utilize the cloud optimally, and
therefore, as cost-efficiently as possible, a continuous monitoring is key to
prevent an over- or under-commissioning of resources. However, selecting a
suitable monitoring solution is a challenging task. Monitoring agents that
collect monitoring data are spread across the monitored IT environment.
Therefore, the possibility of vendor lock-ins leads to a lack of flexibility
when the cloud environment or the business needs change. To handle these
challenges, we introduce $\backslash$textit{generic agent templates} that are applicable
to many monitoring systems and support a replacement of monitoring systems.
Solution-specific technical details of monitoring agents are abstracted from
and system administrators only need to model generic agents, which can be
transformed into solution-specific monitoring agents. The transformation logic
required for this process is provided by domain experts to not further burden
system administrators. Furthermore, we introduce an agent lifecycle to support
the system administrator with the management and deployment of generic agents.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-11&engl=0}
}
@inproceedings {INPROC-2020-08,
author = {Mathias Mormul and Christoph Stach},
title = {{A Context Model for Holistic Monitoring and Management of Complex IT Environments}},
booktitle = {Proceedings of the 2020 IEEE International Conference on Pervasive Computing and Communications Workshops (CoMoRea)},
publisher = {IEEE Computer Society},
institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
pages = {1--1},
type = {Workshop-Beitrag},
month = {M{\"a}rz},
year = {2020},
keywords = {Context Model; IT Operations; AIOps; Monitoring},
language = {Englisch},
cr-category = {C.0 Computer Systems Organization, General},
contact = {Senden Sie eine E-Mail an mathias.mormul@ipvs.uni-stuttgart.de},
department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
abstract = {The increased usage of IoT, containerization, and multiple clouds not only
changed the way IT works but also the way IT Operations, i. e., the monitoring
and management of IT assets, works. Monitoring a complex IT environment leads
to massive amounts of heterogeneous context data, usually spread across
multiple data silos, which needs to be analyzed and acted upon autonomously.
However, for a holistic overview of the IT environment, context data needs to
be consolidated which leads to several problems. For scalable and automated
processes, it is essential to know what context is required for a given
monitored resource, where the context data are originating from, and how to
access them across the data silos. Therefore, we introduce the Monitoring
Resource Model for the holistic management of context data. We show what
context is essential for the management of monitored resources and how it can
be used for context reasoning. Furthermore, we propose a multi-layered
framework for IT Operations with which we present the benefits of the
Monitoring Resource Model.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-08&engl=0}
}
@inproceedings {INPROC-2020-07,
author = {Christoph Stach and Cl{\'e}mentine Gritti and Dennis Przytarski and Bernhard Mitschang},
title = {{Trustworthy, Secure, and Privacy-aware Food Monitoring Enabled by Blockchains and the IoT}},
booktitle = {Proceedings of the 18th Annual IEEE International Conference on Pervasive Computing and Communications Workshops (PerCom Workshops), 23-27 March, 2020, Austin, Texas, USA},
publisher = {IEEE},
institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
pages = {1--4},
type = {Konferenz-Beitrag},
month = {M{\"a}rz},
year = {2020},
keywords = {Attribute-based Credentials; Blockchain; Data Authentication; IoT; Privacy; Service Utility; Transparency; Trust},
language = {Englisch},
cr-category = {K.6.5 Security and Protection,
D.4.6 Operating Systems Security and Protection},
contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de},
department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
abstract = {A large number of food scandals (e.g., falsely declared meat or non-compliance
with hygiene regulations) are causing considerable concern to consumers.
Although Internet of Things (IoT) technologies are used in the food industry to
monitor production (e.g., for tracing the origin of meat or monitoring cold
chains), the gathered data are not used to provide full transparency to the
consumer. To achieve this, however, three aspects must be considered: a) The
origin of the data must be verifiable, i.e., it must be ensured that the data
originate from calibrated sensors. b) The data must be stored tamper-resistant,
immutable, and open to all consumers. c) Despite this openness, the privacy of
affected data subjects (e.g., the carriers) must still be protected. To this
end, we introduce the SHEEPDOG architecture that {\ss}hepherds`` products from
production to purchase to enable a trustworthy, secure, and privacy-aware food
monitoring. In SHEEPDOG, attribute-based credentials ensure trustworthy data
acquisition, blockchain technologies provide secure data storage, and
fine-grained access control enables privacy-aware data provision.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-07&engl=0}
}
@inproceedings {INPROC-2020-06,
author = {Cornelia Kiefer and Peter Reimann and Bernhard Mitschang},
title = {{Prevent Low-Quality Analytics by Automatic Selection of the Best-Fitting Training Data}},
booktitle = {Proceedings of the 53rd Hawaii International Conference on System Sciences (HICSS)},
address = {Maui, Hawaii, USA},
publisher = {Online},
institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
pages = {1036--1045},
type = {Konferenz-Beitrag},
month = {Januar},
year = {2020},
isbn = {978-0-9981331-3-3},
keywords = {data quality; domain-specific data analysis; text analysis; text similarity; training data},
language = {Englisch},
cr-category = {I.2.7 Natural Language Processing},
ee = {https://scholarspace.manoa.hawaii.edu/bitstream/10125/63868/0103.pdf},
department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
abstract = {Data analysis pipelines consist of a sequence of various analysis tools. Most
of these tools are based on supervised machine learning techniques and thus
rely on labeled training data. Selecting appropriate training data has a
crucial impact on analytics quality. Yet, most of the times, domain experts who
construct analysis pipelines neglect the task of selecting appropriate training
data. They rely on default training data sets, e.g., since they do not know
which other training data sets exist and what they are used for. Yet, default
training data sets may be very different from the domain-specific input data
that is to be analyzed, leading to low-quality results. Moreover, these input
data sets are usually unlabeled. Thus, information on analytics quality is not
measurable with evaluation metrics. Our contribution comprises a method that
(1) indicates the expected quality to the domain expert while constructing the
analysis pipeline, without need for labels and (2) automatically selects the
best-fitting training data. It is based on a measurement of the similarity
between input and training data. In our evaluation, we consider the
part-of-speech tagger tool and show that Latent Semantic Analysis (LSA) and
Cosine Similarity are suited as indicators for the quality of analysis results
and as basis for an automatic selection of the best-fitting training data.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-06&engl=0}
}
@inproceedings {INPROC-2020-04,
author = {Christoph Stach and Cl{\'e}mentine Gritti and Bernhard Mitschang},
title = {{Bringing Privacy Control back to Citizens: DISPEL - A Distributed Privacy Management Platform for the Internet of Things}},
booktitle = {Proceedings of the 35th ACM/SIGAPP Symposium On Applied Computing (PDP).},
address = {Brno, Czech Republic},
publisher = {ACM Press},
institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
pages = {1--8},
type = {Konferenz-Beitrag},
month = {M{\"a}rz},
year = {2020},
keywords = {privacy; IoT; authorization concept; attribute-based access control},
language = {Englisch},
cr-category = {K.4.1 Computers and Society Public Policy Issues,
D.4.6 Operating Systems Security and Protection},
contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de},
department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
abstract = {The Internet of Things (IoT) is becoming increasingly popular. It enables a
variety of novel applications. Such applications require a lot of data about
their users. To this end, sensors continuously monitor various aspects of daily
life. Despite the indisputable benefits of IoT applications, this is a severe
privacy threat. Due to the GDPR coming into force, there is a need for action
on the part of IoT vendors. In this paper, we therefore introduce a Privacy by
Design approach for IoT applications called DISPEL. It provides a configuration
method enabling users to specify globally, which application may access what
data for which purpose. Privacy protection is then applied at the earliest
stage possible, i.e., directly on the IoT devices generating the data. Data
transmission is protected against unauthorized access and manipulation.
Evaluation results show that DISPEL fulfills the requirements towards an IoT
privacy system.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-04&engl=0}
}
@inproceedings {INPROC-2020-03,
author = {Christoph Stach and Corinna Giebler and Manuela Wagner and Christian Weber and Bernhard Mitschang},
title = {{AMNESIA: A Technical Solution towards GDPR-compliant Machine Learning}},
booktitle = {Proceedings of the 6th International Conference on Information Systems Security and Privacy (ICISSP 2020)},
editor = {Steven Furnell and Paolo Mori and Edgar Weippl and Olivier Camp},
address = {Valletta, Malta},
publisher = {SciTePress},
institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
pages = {1--12},
type = {Konferenz-Beitrag},
month = {Februar},
year = {2020},
keywords = {Machine Learning; Data Protection; Privacy Zones; Access Control; Model Management; Provenance; GDPR},
language = {Englisch},
cr-category = {K.4.1 Computers and Society Public Policy Issues,
I.5.1 Pattern Recognition Models},
contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de},
department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
abstract = {Machine Learning (ML) applications are becoming increasingly valuable due to
the rise of IoT technologies. That is, sensors continuously gather data from
different domains and make them available to ML for learning its models. This
provides profound insights into the data and enables predictions about future
trends. While ML has many advantages, it also represents an immense privacy
risk. Data protection regulations such as the GDPR address such privacy
concerns, but practical solutions for the technical enforcement of these laws
are also required. Therefore, we introduce AMNESIA, a privacy-aware machine
learning model provisioning platform. AMNESIA is a holistic approach covering
all stages from data acquisition to model provisioning. This enables to control
which application may use which data for ML as well as to make models ``forget''
certain knowledge.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-03&engl=0}
}
@article {ART-2020-20,
author = {Corinna Giebler and Christoph Gr{\"o}ger and Eva Hoos and Rebecca Eichler and Holger Schwarz and Bernhard Mitschang},
title = {{Data Lakes auf den Grund gegangen - Herausforderungen und Forschungsl{\"u}cken in der Industriepraxis}},
journal = {Datenbank Spektrum},
publisher = {Springer},
volume = {20},
pages = {57--69},
type = {Artikel in Zeitschrift},
month = {Januar},
year = {2020},
keywords = {Data Lakes; Analytics; Stand der Technik; Herausforderungen; Praxisbeispiel},
language = {Deutsch},
cr-category = {H.4 Information Systems Applications},
contact = {Senden Sie eine E-Mail an Corinna.Giebler@ipvs.uni-stuttgart.de},
department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
abstract = {Unternehmen stehen zunehmend vor der Herausforderung, gro{\ss}e, heterogene Daten
zu verwalten und den darin enthaltenen Wert zu extrahieren. In den letzten
Jahren kam darum der Data Lake als neuartiges Konzept auf, um diese komplexen
Daten zu verwalten und zu nutzen. Wollen Unternehmen allerdings einen solchen
Data Lake praktisch umsetzen, so sto{\ss}en sie auf vielf{\"a}ltige
Herausforderungen, wie beispielsweise Widerspr{\"u}che in der Definition oder
unscharfe und fehlende Konzepte. In diesem Beitrag werden konkrete Projekte
eines global agierenden Industrieunternehmens genutzt, um bestehende
Herausforderungen zu identifizieren und Anforderungen an Data Lakes
herzuleiten. Diese Anforderungen werden mit der verf{\"u}gbaren Literatur zum
Thema Data Lake sowie mit existierenden Ans{\"a}tzen aus der Forschung
abgeglichen. Die Gegen{\"u}berstellung zeigt, dass f{\"u}nf gro{\ss}e Forschungsl{\"u}cken
bestehen: 1. Unklare Datenmodellierungsmethoden, 2. Fehlende
Data-Lake-Referenzarchitektur, 3. Unvollst{\"a}ndiges Metadatenmanagementkonzept,
4. Unvollst{\"a}ndiges Data-Lake-Governance-Konzept, 5. Fehlende ganzheitliche
Realisierungsstrategie.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2020-20&engl=0}
}
@article {ART-2020-19,
author = {Christoph Stach and Julia Br{\"a}cker and Rebecca Eichler and Corinna Giebler and Cl{\'e}mentine Gritti},
title = {{How to Provide High-Utility Time Series Data in a Privacy-Aware Manner: A VAULT to Manage Time Series Data}},
journal = {International Journal On Advances in Security},
editor = {Hans-Joachim Hof Hof and Birgit Gersbeck-Schierholz},
publisher = {IARIA},
volume = {13},
number = {3\&4},
pages = {1--21},
type = {Artikel in Zeitschrift},
month = {Dezember},
year = {2020},
issn = {1942-2636},
keywords = {Time Series Data; Privacy Filters; Aggregation; Interpolation; Smoothing; Information Emphasis; Noise; Data Quality; Authentication; Permission Model; Data Management},
language = {Englisch},
cr-category = {K.4.1 Computers and Society Public Policy Issues,
D.4.6 Operating Systems Security and Protection},
contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de},
department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
abstract = {Smart Services enrich many aspects of our daily lives, such as in the Ambient
Assisted Living (AAL) domain, where the well-being of patients is automatically
monitored, and patients have more autonomy as a result. A key enabler for such
services is the Internet of Things (IoT). Using IoT-enabled devices, large
amounts of (partly private) data are continuously captured, which can be then
gathered and analyzed by Smart Services. Although these services bring many
conveniences, they therefore also pose a serious threat to privacy. In order to
provide the highest quality of service, they need access to as many data as
possible and even reveal more private information due to in-depth data
analyses. To ensure privacy, however, data minimization is required. Users are
thus forced to balance between service quality and privacy. Current IoT privacy
approaches do not re{\"\i}ect this discrepancy properly. Furthermore, as users are
often not experienced in the proper handling of privacy mechanisms, this leads
to an overly restrictive behavior. Instead of charging users with privacy
control, we introduce VAULT, a novel approach towards a privacy-aware
management of sensitive data. Since in the IoT time series data have a special
position, VAULT is particularly tailored to this kind of data. It attempts to
achieve the best possible tradeoff between service quality and privacy for each
user. To this end, VAULT manages the data and enables a demand-based and
privacy-aware provision of the data, by applying appropriate privacy {\"\i}lters
which ful{\"\i}ll not only the quality requirements of the Smart Services but also
the privacy requirements of users. In doing so, VAULT pursues a Privacy by
Design approach.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2020-19&engl=0}
}
@article {ART-2020-11,
author = {Corinna Giebler and Christoph Gr{\"o}ger and Eva Hoos and Rebecca Eichler and Holger Schwarz and Bernhard Mitschang},
title = {{Data Lakes auf den Grund gegangen - Herausforderungen und Forschungsl{\"u}cken in der Industriepraxis}},
journal = {Datenbank Spektrum},
publisher = {Springer-Verlag},
volume = {20},
pages = {57--69},
type = {Artikel in Zeitschrift},
month = {Januar},
year = {2020},
keywords = {Data Lakes; Industryerfahrung},
language = {Deutsch},
cr-category = {H.2.1 Database Management Logical Design},
contact = {Senden Sie eine E-Mail an Corinna.Giebler@ipvs.uni-stuttgart.de},
department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
abstract = {Unternehmen stehen zunehmend vor der Herausforderung, gro{\ss}e, heterogene Daten
zu verwalten und den darin enthaltenen Wert zu extrahieren. In den letzten
Jahren kam darum der Data Lake als neuartiges Konzept auf, um diese komplexen
Daten zu verwalten und zu nutzen. Wollen Unternehmen allerdings einen solchen
Data Lake praktisch umsetzen, so sto{\ss}en sie auf vielf{\"a}ltige
Herausforderungen, wie beispielsweise Widerspr{\"u}che in der Definition oder
unscharfe und fehlende Konzepte. In diesem Beitrag werden konkrete Projekte
eines global agierenden Industrieunternehmens genutzt, um bestehende
Herausforderungen zu identifizieren und Anforderungen an Data Lakes
herzuleiten. Diese Anforderungen werden mit der verf{\"u}gbaren Literatur zum
Thema Data Lake sowie mit existierenden Ans{\"a}tzen aus der Forschung
abgeglichen. Die Gegen{\"u}berstellung zeigt, dass f{\"u}nf gro{\ss}e Forschungsl{\"u}cken
bestehen: 1. Unklare Datenmodellierungsmethoden, 2. Fehlende
Data-Lake-Referenzarchitektur, 3. Unvollst{\"a}ndiges Metadatenmanagementkonzept,
4. Unvollst{\"a}ndiges Data-Lake-Governance-Konzept, 5. Fehlende ganzheitliche
Realisierungsstrategie.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2020-11&engl=0}
}
@article {ART-2020-10,
author = {Corinna Giebler and Christoph Gr{\"o}ger and Eva Hoos and Rebecca Eichler and Holger Schwarz and Bernhard Mitschang},
title = {{Data Lakes auf den Grund gegangen - Herausforderungen und Forschungsl{\"u}cken in der Industriepraxis}},
journal = {Datenbank Spektrum},
publisher = {Springer-Verlag},
volume = {20},
pages = {57--69},
type = {Artikel in Zeitschrift},
month = {Januar},
year = {2020},
keywords = {Data Lakes; Industryerfahrung},
language = {Deutsch},
cr-category = {H.2.1 Database Management Logical Design},
contact = {Senden Sie eine E-Mail an Corinna.Giebler@ipvs.uni-stuttgart.de},
department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
abstract = {Unternehmen stehen zunehmend vor der Herausforderung, gro{\ss}e, heterogene Daten
zu verwalten und den darin enthaltenen Wert zu extrahieren. In den letzten
Jahren kam darum der Data Lake als neuartiges Konzept auf, um diese komplexen
Daten zu verwalten und zu nutzen. Wollen Unternehmen allerdings einen solchen
Data Lake praktisch umsetzen, so sto{\ss}en sie auf vielf{\"a}ltige
Herausforderungen, wie beispielsweise Widerspr{\"u}che in der Definition oder
unscharfe und fehlende Konzepte. In diesem Beitrag werden konkrete Projekte
eines global agierenden Industrieunternehmens genutzt, um bestehende
Herausforderungen zu identifizieren und Anforderungen an Data Lakes
herzuleiten. Diese Anforderungen werden mit der verf{\"u}gbaren Literatur zum
Thema Data Lake sowie mit existierenden Ans{\"a}tzen aus der Forschung
abgeglichen. Die Gegen{\"u}berstellung zeigt, dass f{\"u}nf gro{\ss}e Forschungsl{\"u}cken
bestehen: 1. Unklare Datenmodellierungsmethoden, 2. Fehlende
Data-Lake-Referenzarchitektur, 3. Unvollst{\"a}ndiges Metadatenmanagementkonzept,
4. Unvollst{\"a}ndiges Data-Lake-Governance-Konzept, 5. Fehlende ganzheitliche
Realisierungsstrategie.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2020-10&engl=0}
}
@article {ART-2020-04,
author = {Corinna Giebler and Christoph Gr{\"o}ger and Eva Hoos and Rebecca Eichler and Holger Schwarz and Bernhard Mitschang},
title = {{Data Lakes auf den Grund gegangen: Herausforderungen und Forschungsl{\"u}cken in der Industriepraxis}},
journal = {Datenbank-Spektrum},
publisher = {Springer},
volume = {20},
number = {1},
pages = {57--69},
type = {Artikel in Zeitschrift},
month = {Januar},
year = {2020},
doi = {10.1007/s13222-020-00332-0},
keywords = {Data Lake; Analytics; Stand der Technik; Herausforderungen; Praxisbeispiel},
language = {Deutsch},
cr-category = {A.1 General Literature, Introductory and Survey,
E.0 Data General},
ee = {https://rdcu.be/b0WM8},
contact = {Senden Sie eine E-Mail an Corinna.Giebler@ipvs.uni-stuttgart.de},
department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
abstract = {Unternehmen stehen zunehmend vor der Herausforderung, gro{\ss}e, heterogene Daten
zu verwalten und den darin enthaltenen Wert zu extrahieren. In den letzten
Jahren kam darum der Data Lake als neuartiges Konzept auf, um diese komplexen
Daten zu verwalten und zu nutzen. Wollen Unternehmen allerdings einen solchen
Data Lake praktisch umsetzen, so sto{\ss}en sie auf vielf{\"a}ltige
Herausforderungen, wie beispielsweise Widerspr{\"u}che in der Definition oder
unscharfe und fehlende Konzepte. In diesem Beitrag werden konkrete Projekte
eines global agierenden Industrieunternehmens genutzt, um bestehende
Herausforderungen zu identifizieren und Anforderungen an Data Lakes
herzuleiten. Diese Anforderungen werden mit der verf{\"u}gbaren Literatur zum
Thema Data Lake sowie mit existierenden Ans{\"a}tzen aus der Forschung
abgeglichen. Die Gegen{\"u}berstellung zeigt, dass f{\"u}nf gro{\ss}e Forschungsl{\"u}cken
bestehen: 1. Unklare Datenmodellierungsmethoden, 2. Fehlende
Data-Lake-Referenzarchitektur, 3. Unvollst{\"a}ndiges Metadatenmanagementkonzept,
4. Unvollst{\"a}ndiges Data-Lake-Governance-Konzept, 5. Fehlende ganzheitliche
Realisierungsstrategie.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2020-04&engl=0}
}