Publications AS: Bibliography 2022 BibTeX
@inproceedings {INPROC-2022-09,
author = {Jan Schneider and Pascal Hirmer},
title = {{Enhancing IoT Platforms for Autonomous Device Discovery and Selection}},
booktitle = {Service-Oriented Computing},
editor = {Johanna Barzen and Frank Leymann and Schahram Dustdar},
publisher = {Springer International Publishing},
institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
series = {Communications in Computer and Information Science},
volume = {1603},
pages = {24--44},
type = {Conference Paper},
month = {October},
year = {2022},
isbn = {978-3-031-18304-1},
keywords = {Internet of Things; IoT platforms; Device discovery},
language = {English},
cr-category = {C.2.1 Network Architecture and Design,
C.2.4 Distributed Systems},
ee = {https://doi.org/10.1007/978-3-031-18304-1_2},
department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
abstract = {The Internet of Things (IoT) encompasses a variety of technologies that enable
the formation of adaptive and flexible networks from heterogeneous devices.
Along with the rising number of applications, the amount of devices within IoT
ecosystems is constantly increasing. In order to cope with this inherent
complexity and to enable efficient administration and orchestration of devices,
IoT platforms have emerged in recent years. While many IoT platforms empower
users to define application logic for use cases and execute it within an
ecosystem, they typically rely on static device references, leading to huge
manual maintenance efforts and low robustness. In this paper, we present an
approach that allows IoT platforms to autonomously and reliably execute
pre-defined use cases by automatically discovering and selecting the most
suitable devices. It establishes loose coupling and hence does not impose major
technical constraints on the ecosystems in which it is operated.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2022-09&engl=1}
}
@inproceedings {INPROC-2022-08,
author = {Rebecca Eichler and Christoph Gr{\"o}ger and Eva Hoos and Christoph Stach and Holger Schwarz and Bernhard Mitschang},
title = {{Establishing the Enterprise Data Marketplace: Characteristics, Architecture, and Challenges}},
booktitle = {Proceedings of the Workshop on Data Science for Data Marketplaces in Conjunction with the 48th International Conference on Very Large Data Bases},
editor = {Xiaohui Yu and Jian Pei},
publisher = {-},
institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
pages = {1--12},
type = {Workshop Paper},
month = {September},
year = {2022},
language = {English},
cr-category = {E.m Data Miscellaneous,
H.3.7 Digital Libraries,
H.4.m Information Systems Applications Miscellaneous},
department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
abstract = {Companies today have increasing amounts of data at their disposal, most of
which is not used, leaving the data value unexploited. In order to leverage the
data value, the data must be democratized, i.e., made available to the company
employees. In this context, the use of enterprise data marketplaces, platforms
for trading data within a company, are proposed. However, specifics of
enterprise data marketplaces and how these can be implemented have not been
investigated in literature so far. To shed light on these topics, we illustrate
the characteristics of an enterprise data marketplace and highlight according
marketplace requirements. We provide an enterprise data marketplace
architecture, discuss how it integrates into a company's system landscape and
present an enterprise data marketplace prototype. Finally, we examine
organizational and technical challenges which arise when operating a
marketplace in the enterprise context. In this paper, we thereby present the
enterprise data marketplace as a distinct marketplace type and provide the
basis for establishing it within a company.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2022-08&engl=1}
}
@inproceedings {INPROC-2022-07,
author = {Yunxuan Li and Pascal Hirmer and Christoph Stach and Bernhard Mitschang},
title = {{Ensuring Situation-Aware Privacy for Connected Vehicles}},
booktitle = {Proceedings of the 12th International Conference on the Internet of Things (IoT); Delft, Netherlands, November 7 - 10, 2022},
editor = {Evangelos Niforatos and Gerd Kortuem and Nirvana Meratnia and Josh Siegel and Florian Michahelles},
address = {New York, NY, USA},
publisher = {ACM},
institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
pages = {135--138},
type = {Conference Paper},
month = {November},
year = {2022},
isbn = {978-1-4503-9665-3},
doi = {10.1145/3567445.3569163},
keywords = {Connected Vehicle; Situation-Awareness; Privacy-Preserving},
language = {English},
cr-category = {K.4.1 Computers and Society Public Policy Issues,
K.6.5 Security and Protection},
department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
abstract = {As technology advances in new sensors and software, modern vehicles become
increasingly intelligent. To date, connected vehicles can collect, process, and
share data with other entities in connected vehicle environments. However, in
terms of data collection and exchange, privacy becomes a central issue. It is
challenging to preserve privacy in connected vehicle environments when the
privacy demands of drivers could change from situation to situation even for
the same service. In this paper, we analyze the requirements for a
privacy-preserving system in connected vehicle environments with a focus on
situation-awareness and safety aspects. Based on the analysis, we propose a
novel situation-aware privacy-preserving framework for connected vehicles. Our
framework supports individual privacy protections for specific end-point
services and situation-aware privacy protections for different circumstances.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2022-07&engl=1}
}
@inproceedings {INPROC-2022-06,
author = {Julian Ziegler and Peter Reimann and Christoph Schulz and Florian Keller and Bernhard Mitschang},
title = {{A Graph Structure to Discover Patterns in Unstructured Processes of Product Development}},
booktitle = {Proceedings of the 23rd International Conference on Information Reuse and Integration for Data Science (IRI 2022)},
publisher = {IEEE},
institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
type = {Conference Paper},
month = {August},
year = {2022},
keywords = {Process Discovery; Unstructured Processes; Process Patterns; Graph Data; Frequent Subgraph Mining},
language = {English},
cr-category = {H.2.8 Database Applications},
department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
abstract = {A well-known information reuse approach is to exploit event logs for process
discovery and optimization. However, process discovery is rarely used for
product development projects. This is because information systems in product
development, e. g., Product-Lifecycle-Management (PLM) systems, do not provide
the event logs required by process discovery algorithms. Additionally, existing
algorithms struggle with development projects, as these are unstructured and
rich in variety. In this paper, we propose a novel approach to process
discovery in order to make it applicable and tailored to product development
projects. Instead of using flat event logs, we provide a graph-based data
structure that is able to represent both activities and data of product
development projects with the dataflow between activities. Based on this
structure, we can leverage provenance available in PLM systems. Furthermore, we
may use frequent subgraph mining to discover process patterns. Such patterns
are well suited to describe different variants and common sub-processes of
unstructured processes. Using a prototype, we evaluate this approach and
successfully discover prevailing patterns. These patterns may be used by
engineers to support their decision-making or help improve the execution of
development projects.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2022-06&engl=1}
}
@inproceedings {INPROC-2022-05,
author = {Rebecca Eichler and Christoph Gr{\"o}ger and Eva Hoos and Holger Schwarz and Bernhard Mitschang},
title = {{Data Shopping — How an Enterprise Data Marketplace Supports Data Democratization in Companies}},
booktitle = {Proceedings of the 34th International Conference on Intelligent Information Systems},
editor = {Jochen De Weerdt and Artem Polyvyanyy},
address = {Stuttgart},
publisher = {Springer International Publishing},
institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
series = {Rebecca Eichler},
pages = {19--26},
type = {Conference Paper},
month = {May},
year = {2022},
isbn = {https://doi.org/10.1007/978-3-031-07481-3_3},
keywords = {Data Marketplace; Data Sharing; Data Democratization},
language = {English},
cr-category = {H.0 Information Systems General},
department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
abstract = {To exploit the company's data value, employees must be able to find, understand
and access it. The process of making corporate data available to the majority
of the company's employees is referred to as data democratization. In this
work, we present the current state and challenges of data democratization in
companies, derived from a comprehensive literature study and expert interviews
we conducted with a manufacturer. In this context a data consumer's journey is
presented that reflects the required steps, tool types and roles for finding,
understanding and accessing data in addition to revealing three data
democratization challenges. To address these challenges we propose the use of
an enterprise data marketplace, a novel type of information system for sharing
data within the company. We developed a prototype based on which a suitability
assessment of a data marketplace yields an improved consumer journey and
demonstrates that the marketplace addresses the data democratization challenges
and consequently, shows that the marketplace is suited for realizing data
democratization.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2022-05&engl=1}
}
@inproceedings {INPROC-2022-03,
author = {Marco Spie{\ss} and Peter Reimann and Christian Weber and Bernhard Mitschang},
title = {{Analysis of Incremental Learning andWindowing to handle Combined Dataset Shifts on Binary Classification for Product Failure Prediction}},
booktitle = {Proceedings of the 24th International Conference on Enterprise Information Systems (ICEIS 2022)},
publisher = {SciTePress},
institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
type = {Conference Paper},
month = {April},
year = {2022},
keywords = {Binary Classification; Dataset Shift; Incremental Learning; Product Failure Prediction; Windowing.},
language = {English},
cr-category = {H.2.8 Database Applications},
department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
abstract = {Dataset Shifts (DSS) are known to cause poor predictive performance in
supervised machine learning tasks. We present a challenging binary
classification task for a real-world use case of product failure prediction.
The target is to predict whether a product, e. g., a truck may fail during the
warranty period. However, building a satisfactory classifier is difficult,
because the characteristics of underlying training data entail two kinds of
DSS. First, the distribution of product configurations may change over time,
leading to a covariate shift. Second, products gradually fail at different
points in time, so that the labels in training data may change, which may a
concept shift. Further, both DSS show a trade-off relationship, i. e.,
addressing one of them may imply negative impacts on the other one. We discuss
the results of an experimental study to investigate how different approaches to
addressing DSS perform when they are faced with both a covariate and a concept
shift. Thereby, we prove that existing approaches, e. g., incremental learning
and windowing, especially suffer from the trade-off between both DSS.
Nevertheless, we come up with a solution for a data-driven classifier that
yields better results than a baseline solution that does not address DSS.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2022-03&engl=1}
}
@inproceedings {INPROC-2022-02,
author = {Florian Hermann and Bowen Chen and Golsa Ghasemi and Valentin Stegmaier and Thomas Ackermann and Peter Reimann and Sabrina Vogt and Thomas Graf and Michael Weyrich},
title = {{A Digital Twin Approach for the Prediction of the Geometry of Single Tracks Produced by Laser Metal Deposition}},
booktitle = {Procedia CIRP: Proceedings of the 55th CIRP Conference on Manufacturing Systems (CIRP CMS 2022)},
publisher = {Elsevier BV},
institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
type = {Conference Paper},
month = {June},
year = {2022},
keywords = {Laser metal deposition; Software-defined manufacturing; Digital Twin; Asset Administration Shell},
language = {English},
cr-category = {H.2.8 Database Applications},
department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
abstract = {Flexible manufacturing processes such as laser metal deposition have a high
potential for a production solely defined by software to cope with the current
challenges of production systems. The determination of suitable machine
parameters for the production of novel materials and geometries however
requires extensive experimental effort. Existing simulative approaches do not
offer sufficient accuracy to predict the relevant machine parameters in a
satisfactory way. This paper presents a new concept, in which we apply a
digital twin to provide a step towards a fully software-defined and predictable
laser metal deposition process. The presented concept includes relevant data of
the machines as well as data-driven machine learning models and physics-based
simulation models. This enables a more reliable prediction of geometries of
single tracks which was validated on a laser metal deposition machine.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2022-02&engl=1}
}
@inproceedings {INPROC-2022-01,
author = {Christoph Stach and Cl{\'e}mentine Gritti and Dennis Przytarski and Bernhard Mitschang},
title = {{Can Blockchains and Data Privacy Laws be Reconciled? A Fundamental Study of How Privacy-Aware Blockchains are Feasible}},
booktitle = {Proceedings of the 37th ACM/SIGAPP Symposium On Applied Computing},
editor = {Jiman Hong and Miroslav Bures and Ronald Petrlic and Christoph Sorge},
address = {Brno},
publisher = {ACM},
institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
pages = {1--10},
type = {Conference Paper},
month = {April},
year = {2022},
isbn = {978-1-4503-8713-2},
doi = {10.1145/3477314.3506986},
keywords = {blockchains; immutable; tamper-proof; GDPR; privacy assessment},
language = {English},
cr-category = {K.4.1 Computers and Society Public Policy Issues,
K.6.5 Security and Protection},
contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de.},
department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
abstract = {Due to the advancing digitalization, the importance of data is constantly
increasing. Application domains such as smart cars, smart cities, or smart
healthcare rely on the permanent availability of large amounts of data to all
parties involved. As a result, the value of data increases, making it a
lucrative target for cyber-attacks. Particularly when human lives depend on the
data, additional protection measures are therefore important for data
management and provision. Blockchains, i.e., decentralized, immutable, and
tamper-proof data stores, are becoming increasingly popular for this purpose.
Yet, from a data protection perspective, the immutable and tamper-proof
properties of blockchains pose a privacy concern. In this paper, we therefore
investigate whether blockchains are in compliance with the General Data
Protection Regulation (GDPR) if personal data are involved. To this end, we
elaborate which articles of the GDPR are relevant in this regard and present
technical solutions for those legal requirements with which blockchains are in
conflict. We further identify open research questions that need to be addressed
in order to achieve a privacy-by-design blockchain system.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2022-01&engl=1}
}
@article {ART-2022-09,
author = {Christoph Stach},
title = {{Editorial to the Special Issue on Security and Privacy in Blockchains and the IoT}},
journal = {Future Internet},
publisher = {MDPI},
volume = {14},
number = {11},
pages = {1--4},
type = {Article in Journal},
month = {November},
year = {2022},
issn = {1999-5903},
doi = {10.3390/fi14110317},
language = {English},
cr-category = {D.4.6 Operating Systems Security and Protection,
K.4.1 Computers and Society Public Policy Issues,
K.6.5 Security and Protection},
ee = {https://www.mdpi.com/1999-5903/14/11/317/htm},
contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de.},
department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
abstract = {In this day and age, data have become an immensely valuable resource. They are
the key driver that puts the smart into smart services. This is fundamentally
fueled by two technological achievements, namely the Internet of Things (IoT),
which enables continuous and comprehensive collection of all kinds of data, and
blockchain technologies, which provide secure data management and exchange. In
addition to those information security measures, however, data privacy
solutions are also required to protect the involved sensitive data. In this
Special Issue, eight research papers address security and privacy challenges
when dealing with blockchain technologies and the IoT. Concerning the IoT,
solutions are presented on how IoT group communication can be secured and how
trust within IoT applications can be increased. In the context of blockchain
technologies, approaches are introduced on how the capabilities of query
processing can be enhanced and how a proof-of-work consensus protocol can be
efficiently applied in IoT environments. Furthermore, it is discussed how
blockchain technologies can be used in IoT environments to control access to
confidential IoT data as well as to enable privacy-aware data sharing. Finally,
two reviews give an overview of the state of the art in in-app activity
recognition based on convolutional neural networks and the prospects for
blockchain technology applications in ambient assisted living.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2022-09&engl=1}
}
@article {ART-2022-08,
author = {Christoph Stach and Cl{\'e}mentine Gritti and Dennis Przytarski and Bernhard Mitschang},
title = {{Assessment and Treatment of Privacy Issues in Blockchain Systems}},
journal = {ACM SIGAPP Applied Computing Review},
publisher = {ACM},
volume = {22},
number = {3},
pages = {5--24},
type = {Article in Journal},
month = {September},
year = {2022},
issn = {1559-6915},
keywords = {blockchain; decentralized; immutable; tamper-proof; GDPR; privacy assessment; data purging; data authentication; permission control; privacy filters; privacy control environment},
language = {English},
cr-category = {K.4.1 Computers and Society Public Policy Issues,
K.6.5 Security and Protection},
contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de.},
department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
abstract = {The ability to capture and quantify any aspect of daily life via sensors,
enabled by the Internet of Things (IoT), data have become one of the most
important resources of the 21st century. However, the high value of data also
renders data an appealing target for criminals. Two key protection goals when
dealing with data are therefore to maintain their permanent availability and to
ensure their integrity. Blockchain technology provides a means of data
protection that addresses both of these objectives. On that account,
blockchains are becoming increasingly popular for the management of critical
data. As blockchains are operated in a decentralized manner, they are not only
protected against failures, but it is also ensured that neither party has sole
control over the managed data. Furthermore, blockchains are immutable and
tamper-proof data stores, whereby data integrity is guaranteed. While these
properties are preferable from a data security perspective, they also pose a
threat to privacy and confidentiality, as data cannot be concealed, rectified,
or deleted once they are added to the blockchain.
In this paper, we therefore investigate which features of the blockchain pose
an inherent privacy threat when dealing with personal or confidential data. To
this end, we consider to what extent blockchains are in compliance with
applicable data protection laws, namely the European General Data Protection
Regulation (GDPR). Based on our identified key issues, we assess which concepts
and technical measures can be leveraged to address these issues in order to
create a privacy-by-design blockchain system.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2022-08&engl=1}
}
@article {ART-2022-07,
author = {Christoph Stach and Cl{\'e}mentine Gritti and Julia Br{\"a}cker and Michael Behringer and Bernhard Mitschang},
title = {{Protecting Sensitive Data in the Information Age: State of the Art and Future Prospects}},
journal = {Future Internet},
publisher = {MDPI},
volume = {14},
number = {11},
pages = {1--42},
type = {Article in Journal},
month = {October},
year = {2022},
issn = {1999-5903},
doi = {10.3390/fi14110302},
keywords = {smart service; privacy techniques; location-based services; health services; voice-controlled digital assistants; image analysis; food analysis; recommender systems; DNA sequence classification},
language = {English},
cr-category = {K.4.1 Computers and Society Public Policy Issues,
K.6.5 Security and Protection},
ee = {https://www.mdpi.com/1999-5903/14/11/302/htm},
contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de.},
department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
abstract = {The present information age is characterized by an ever-increasing
digitalization. Smart devices quantify our entire lives. These collected data
provide the foundation for data-driven services called smart services. They are
able to adapt to a given context and thus tailor their functionalities to the
user's needs. It is therefore not surprising that their main resource, namely
data, is nowadays a valuable commodity that can also be traded. However, this
trend does not only have positive sides, as the gathered data reveal a lot of
information about various data subjects. To prevent uncontrolled insights into
private or confidential matters, data protection laws restrict the processing
of sensitive data. One key factor in this regard is user-friendly privacy
mechanisms. In this paper, we therefore assess current state-of-the-art privacy
mechanisms. To this end, we initially identify forms of data processing applied
by smart services. We then discuss privacy mechanisms suited for these use
cases. Our findings reveal that current state-of-the-art privacy mechanisms
provide good protection in principle, but there is no compelling
one-size-fits-all privacy approach. This leads to further questions regarding
the practicality of these mechanisms, which we present in the form of seven
thought-provoking propositions.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2022-07&engl=1}
}
@article {ART-2022-06,
author = {Christoph Stach and Michael Behringer and Julia Br{\"a}cker and Cl{\'e}mentine Gritti and Bernhard Mitschang},
title = {{SMARTEN — A Sample-Based Approach towards Privacy-Friendly Data Refinement}},
journal = {Journal of Cybersecurity and Privacy},
publisher = {MDPI},
volume = {2},
number = {3},
pages = {606--628},
type = {Article in Journal},
month = {August},
year = {2022},
issn = {2624-800X},
doi = {10.3390/jcp2030031},
keywords = {privacy; data refinement; data cleansing; data transformation; human-in-the-loop},
language = {English},
cr-category = {K.4.1 Computers and Society Public Policy Issues,
K.6.5 Security and Protection},
ee = {https://www.mdpi.com/2624-800X/2/3/31/htm},
contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de.},
department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
abstract = {Two factors are crucial for the effective operation of modern-day smart
services: Initially, IoT-enabled technologies have to capture and combine huge
amounts of data on data subjects. Then, all these data have to be processed
exhaustively by means of techniques from the area of big data analytics. With
regard to the latter, thorough data refinement in terms of data cleansing and
data transformation is the decisive cornerstone. Studies show that data
refinement reaches its full potential only by involving domain experts in the
process. However, this means that these experts need full insight into the data
in order to be able to identify and resolve any issues therein, e.g., by
correcting or removing inaccurate, incorrect, or irrelevant data records. In
particular for sensitive data (e.g., private data or confidential data), this
poses a problem, since these data are thereby disclosed to third parties such
as domain experts. To this end, we introduce SMARTEN, a sample-based approach
towards privacy-friendly data refinement to smarten up big data analytics and
smart services. SMARTEN applies a revised data refinement process that fully
involves domain experts in data pre-processing but does not expose any
sensitive data to them or any other third-party. To achieve this, domain
experts obtain a representative sample of the entire data set that meets all
privacy policies and confidentiality guidelines. Based on this sample, domain
experts define data cleaning and transformation steps. Subsequently, these
steps are converted into executable data refinement rules and applied to the
entire data set. Domain experts can request further samples and define further
rules until the data quality required for the intended use case is reached.
Evaluation results confirm that our approach is effective in terms of both data
quality and data privacy.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2022-06&engl=1}
}
@article {ART-2022-01,
author = {Christoph Stach and Julia Br{\"a}cker and Rebecca Eichler and Corinna Giebler and Bernhard Mitschang},
title = {{Simplified Specification of Data Requirements for Demand-Actuated Big Data Refinement}},
journal = {Journal of Data Intelligence},
publisher = {Rinton Press},
volume = {3},
number = {3},
pages = {366--400},
type = {Article in Journal},
month = {August},
year = {2022},
issn = {2577-610X},
keywords = {data pre-processing; data transformation; knowledge modeling; ontology; data management; Data Lakes; zone model; food analysis},
language = {English},
cr-category = {H.2.7 Database Administration,
E.2 Data Storage Representations,
H.3.3 Information Search and Retrieval,
H.2.8 Database Applications},
contact = {Senden Sie eine E-Mail an christoph.stach@ipvs.uni-stuttgart.de.},
department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
abstract = {Data have become one of the most valuable resources in modern society. Due to
increasing digitalization and the growing prevalence of the Internet of Things,
it is possible to capture data on any aspect of today's life. Similar to
physical resources, data have to be refined before they can become a profitable
asset. However, such data preparation entails completely novel challenges: For
instance, data are not consumed when being processed, whereby the volume of
available data that needs to be managed increases steadily. Furthermore, the
data preparation has to be tailored to the intended use case in order to
achieve an optimal outcome. This, however, requires the knowledge of domain
experts. Since such experts are typically not IT experts, they need tools that
enable them to specify the data requirements of their use cases in a
user-friendly manner. The goal of this data preparation is to provide any
emerging use case with demand-actuated data.
With this in mind, we designed a tailorable data preparation zone for Data
Lakes called BARENTS. It provides a simplified method for domain experts to
specify how data must be pre-processed for their use cases, and these data
preparation steps are then applied automatically. The data requirements are
specified by means of an ontology-based method which is comprehensible to
non-IT experts. Data preparation and provisioning are realized
resource-efficient by implementing BARENTS as a dedicated zone for Data Lakes.
This way, BARENTS is seamlessly embeddable into established Big Data
infrastructures.
This article is an extended and revised version of the conference paper
``Demand-Driven Data Provisioning in Data Lakes: BARENTS - A Tailorable Data
Preparation Zone'' by Stach et al. In comparison to our original conference
paper, we take a more detailed look at related work in the paper at hand. The
emphasis of this extended and revised version, however, is on strategies to
improve the performance of BARENTS and enhance its functionality. To this end,
we discuss in-depth implementation details of our prototype and introduce a
novel recommender system in BARENTS that assists users in specifying data
preparation steps.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2022-01&engl=1}
}