Publications AS: Bibliography 2023 BibTeX
@inproceedings {INPROC-2023-07,
author = {Andrea Fieschi and Yunxuan Li and Pascal Hirmer and Christoph Stach and Bernhard Mitschang},
title = {{Privacy in Connected Vehicles: Perspectives of Drivers and Car Manufacturers}},
booktitle = {Service-Oriented Computing: 17th Symposium and Summer School, SummerSOC 2023, Heraklion, Crete, Greece, June 25 – July 1, 2023, Revised Selected Papers},
editor = {Marco Aiello and Johanna Barzen and Schahram Dustdar and Frank Leymann},
address = {Cham},
publisher = {Springer Nature Switzerland},
institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
series = {Communications in Computer and Information Science},
volume = {1847},
pages = {59--68},
type = {Conference Paper},
month = {October},
year = {2023},
isbn = {978-3-031-45727-2},
doi = {10.1007/978-3-031-45728-9_4},
keywords = {Connected Vehicles; Privacy; Anonymization},
language = {English},
cr-category = {K.4.1 Computers and Society Public Policy Issues},
contact = {Senden Sie eine E-Mail an Andrea Fieschi (Andrea.Fieschi@ipvs.uni-stuttgart.de) oder Yunxuan Li (Yunxuan.Li@ipvs.uni-stuttgart.de).},
department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
abstract = {The digital revolution has led to significant technological advancements in the
automotive industry, enabling vehicles to process and share information with
other vehicles and the cloud. However, as data sharing becomes more prevalent,
privacy protection has become an essential issue. In this paper, we explore
various privacy challenges regarding different perspectives of drivers and car
manufacturers. We also propose general approaches to overcome these challenges
with respect to their individual needs. Finally, we highlight the importance of
collaboration between drivers and car manufacturers to establish trust and
achieve better privacy protection.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2023-07&engl=1}
}
@inproceedings {INPROC-2023-06,
author = {Jan Schneider and Christoph Gr{\"o}ger and Arnold Lutsch and Holger Schwarz and Bernhard Mitschang},
title = {{Assessing the Lakehouse: Analysis, Requirements and Definition}},
booktitle = {Proceedings of the 25th International Conference on Enterprise Information Systems, ICEIS 2023, Volume 1, Prague, Czech Republic, April 24-26, 2023},
editor = {Joaquim Filipe and Michal Smialek and Alexander Brodsky and Slimane Hammoudi},
address = {Prague},
publisher = {SciTePress},
institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
pages = {44--56},
type = {Conference Paper},
month = {May},
year = {2023},
isbn = {978-989-758-648-4},
issn = {2184-4992},
doi = {10.5220/0011840500003467},
keywords = {Lakehouse; Data Warehouse; Data Lake; Data Management; Data Analytics},
language = {English},
cr-category = {H.2.4 Database Management Systems,
H.2.7 Database Administration,
H.2.8 Database Applications},
ee = {https://www.scitepress.org/PublicationsDetail.aspx?ID=9ydI3Lyl2Fk=,
https://doi.org/10.5220/0011840500003467},
contact = {jan.schneider@ipvs.uni-stuttgart.de},
department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
abstract = {The digital transformation opens new opportunities for enterprises to optimize
their business processes by applying data-driven analysis techniques. For
storing and organizing the required huge amounts of data, different types of
data platforms have been employed in the past, with data warehouses and data
lakes being the most prominent ones. Since they possess rather contrary
characteristics and address different types of analytics, companies typically
utilize both of them, leading to complex architectures with replicated data and
slow analytical processes. To counter these issues, vendors have recently been
making efforts to break the boundaries and to combine features of both worlds
into integrated data platforms. Such systems are commonly called lakehouses and
promise to simplify enterprise analytics architectures by serving all kinds of
analytical workloads from a single platform. However, it remains unclear how
lakehouses can be characterized, since existing definitions focus al most
arbitrarily on individual architectural or functional aspects and are often
driven by marketing. In this paper, we assess prevalent definitions for
lakehouses and finally propose a new definition, from which several technical
requirements for lakehouses are derived. We apply these requirements to several
popular data management tools, such as Delta Lake, Snowflake and Dremio in
order to evaluate whether they enable the construction of lakehouses.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2023-06&engl=1}
}
@inproceedings {INPROC-2023-05,
author = {Thomas Ackermann and Robert Miehe and Peter Reimann and Bernhard Mitschang and Ralf Takors and Thomas Bauernhansl},
title = {{A Cross-Disciplinary Training Concept for Future Technologists in the Dawn of Biointelligent Production Systems}},
booktitle = {Procedia CIRP: Proceedings of 13th CIRP Conference on Learning Factories (CIRP CLF)},
publisher = {Elsevier BV},
institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
type = {Conference Paper},
month = {May},
year = {2023},
keywords = {Biointelligent systems; Biological transformation; Converging technologies; Qualification},
language = {English},
cr-category = {H.2.8 Database Applications},
department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
abstract = {Biologicalization is considered one of the most important transformation
processes in industry alongside digitalization. This work presents a
qualification concept within the Stuttgart Biointelligent Manufacturing
Framework (BioMEFUS), which is intended to provide skills and experiences at
the intersections between manufacturing and process engineering, computer
science and life science. Life cycle management, production methods and
engineering of components towards the development and implementation of
biointelligent systems are considered as the major engineering platforms of the
framework. The qualification concept is developed for early stage researchers
(ESRs) at the doctorate stage. It provides a mapping of individual research
projects in the field of biointelligent production systems and contains
subject-related and methodological building blocks for the formation of future
experts and decision-makers in the course of biological transformation.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2023-05&engl=1}
}
@inproceedings {INPROC-2023-04,
author = {Julius Voggesberger and Peter Reimann and Bernhard Mitschang},
title = {{Towards the Automatic Creation of Optimized Classifier Ensembles}},
booktitle = {Proceedings of the 25th International Conference on Enterprise Information Systems (ICEIS 2023)},
publisher = {SciTePress},
institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
pages = {614--621},
type = {Conference Paper},
month = {April},
year = {2023},
keywords = {Classifier Ensembles; Classifier Diversity; Decision Fusion; AutoML; Machine Learning},
language = {English},
cr-category = {H.2.8 Database Applications},
department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
abstract = {Classifier ensemble algorithms allow for the creation of combined machine
learning models that are more accurate and generalizable than individual
classifiers. However, creating such an ensemble is complex, as several
requirements must be fulfilled. An expert has to select multiple classifiers
that are both accurate and diverse. In addition, a decision fusion algorithm
must be selected to combine the predictions of these classifiers into a
consensus decision. Satisfying these requirements is challenging even for
experts, as it requires a lot of time and knowledge. In this position paper, we
propose to automate the creation of classifier ensembles. While there already
exist several frameworks that automatically create multiple classifiers, none
of them meet all requirements to build optimized ensembles based on these
individual classifiers. Hence, we introduce and compare three basic approaches
that tackle this challenge. Based on the comparison results, we propose one of
the approaches that best meets the requirements to lay the foundation for
future work.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2023-04&engl=1}
}
@inproceedings {INPROC-2023-03,
author = {Yannick Wilhelm and Peter Reimann and Wolfgang Gauchel and Steffen Klein and Bernhard Mitschang},
title = {{PUSION- A Generic and Automated Framework for Decision Fusion}},
booktitle = {Proceedings of the 39th IEEE International Conference on Data Engineering (ICDE 2023)},
publisher = {IEEE},
institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
type = {Conference Paper},
month = {April},
year = {2023},
keywords = {Classifier ensembles; decision fusion; automated decision fusion; hybrid fault diagnosis},
language = {English},
cr-category = {H.2.8 Database Applications},
department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
abstract = {Combining two or more classifiers into an ensemble and fusing the individual
classifier decisions to a consensus decision can improve the accuracy for a
classification problem. The classification improvement of the fusion result
depends on numerous factors, such as the data set, the combination scenario,
the decision fusion algorithm, as well as the prediction accuracies and
diversity of the multiple classifiers to be combined. Due to these factors, the
best decision fusion algorithm for a given decision fusion problem cannot be
generally determined in advance. In order to support the user in combining
classifiers and to achieve the best possible fusion result, we propose the
PUSION (Python Universal fuSION) framework, a novel generic and automated
framework for decision fusion of classifiers. The framework includes 14
decision fusion algorithms and covers a total of eight different combination
scenarios for both multi-class and multi-label classification problems. The
introduced concept of AutoFusion detects the combination scenario for a given
use case, automatically selects the applicable decision fusion algorithms and
returns the decision fusion algorithm that leads to the best fusion result. The
framework is evaluated with two real-world case studies in the field of fault
diagnosis. In both case studies, the consensus decision of multiple classifiers
and heterogeneous fault diagnosis methods significantly increased the overall
classification accuracy. Our evaluation results show that our framework is of
practical relevance and reliably finds the best performing decision fusion
algorithm for a given combination task.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2023-03&engl=1}
}
@inproceedings {INPROC-2023-02,
author = {Dennis Treder-Tschechlov and Peter Reimann and Holger Schwarz and Bernhard Mitschang},
title = {{Approach to Synthetic Data Generation for Imbalanced Multi-class Problems with Heterogeneous Groups}},
booktitle = {Tagungsband der 20. Fachtagung Datenbanksysteme f{\"u}r Business, Technologie und Web (BTW 2019)},
publisher = {GI Gesellschaft f{\"u}r Informatik e.V. (GI)},
institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
series = {Lecture Notes in Informatics (LNI)},
pages = {329--351},
type = {Conference Paper},
month = {March},
year = {2023},
keywords = {Machine learning; classification; data generation; real-world data characteristics},
language = {English},
cr-category = {H.2.8 Database Applications},
ee = {https://dl.gi.de/bitstream/handle/20.500.12116/40320/B3-5.pdf?},
department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
abstract = {To benchmark novel classification algorithms, these algorithms should be
evaluated on data with characteristics that also appear in real-world use
cases. Important data characteristics that often lead to challenges for
classification approaches are multi-class imbalance and heterogeneous groups.
Heterogeneous groups are sets of real-world entities, where the classification
patterns may vary among different groups and where the groups are typically
imbalanced in the data. Real-world data that comprise these characteristics are
usually not publicly available, e.g., because they constitute sensitive patient
information or due to privacy concerns. Further, the manifestations of the
characteristics cannot be controlled specifically on real-world data. A more
rigorous approach is to synthetically generate data such that different
manifestations of the characteristics can be controlled as well. However,
existing data generators are not able to generate data that feature both data
characteristics, i.e., multi-class imbalance and heterogeneous groups. In this
paper, we propose an approach that fills this gap as it allows to synthetically
generate data that exhibit both characteristics. We make use of a taxonomy
model that organizes real-world entities in domain-specific heterogeneous
groups to generate data reflecting the characteristics of these groups.
Further, we incorporate probability distributions to reflect the imbalances of
multiple classes and groups from real-world use cases. The evaluation shows
that our approach can generate data that feature the data characteristics
multi-class imbalance and heterogeneous groups and that it allows to control
different manifestations of these characteristics.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2023-02&engl=1}
}
@inproceedings {INPROC-2023-01,
author = {Yunxuan Li and Pascal Hirmer and Christoph Stach},
title = {{CV-Priv: Towards a Context Model for Privacy Policy Creation for Connected Vehicles}},
booktitle = {Proceedings of the 21st International Conference on Pervasive Computing and Communications Workshops},
publisher = {IEEE},
institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
pages = {1--6},
type = {Conference Paper},
month = {March},
year = {2023},
keywords = {Context Modeling; Ontology; Privacy Policy; Privacy-Preserving; Connected Vehicle},
language = {English},
cr-category = {K.4.1 Computers and Society Public Policy Issues,
K.6.5 Security and Protection},
department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
abstract = {Connected vehicles are becoming progressively capable of collecting,
processing, and sharing data, which leads to a growing concern about privacy in
the automotive domain. However, research has shown that although users are
highly concerned about their privacy, they usually find it difficult to
configure privacy settings. This is because the privacy context, which
represents the privacy circumstance a driver faces during the privacy policy
creation, is highly complex. To create custom privacy policies, drivers must
consider the privacy context information, such as what service is requesting
data from which vehicle sensor, or what privacy countermeasures are available
for vehicles and satisfy certain privacy properties. This easily leads to
information and choice overhead. Therefore, we propose the novel ontology-based
privacy context model, CV-Priv, for the modeling of such privacy context
information for creating custom privacy policies in the automotive domain. In
this paper, we analyze the design requirements for a privacy context model
based on challenges drivers might face during the privacy policy creation
phase. We also demonstrate how CV-Priv can be utilized by context-aware systems
to help drivers transform their fuzzy privacy requirements into sound privacy
policies.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2023-01&engl=1}
}
@article {ART-2023-07,
author = {Rebecca Eichler and Christoph Gr{\"o}ger and Eva Hoos and Christoph Stach and Holger Schwarz and Bernhard Mitschang},
title = {{Introducing the enterprise data marketplace: a platform for democratizing company data}},
journal = {Journal of Big Data},
publisher = {Springer Nature},
volume = {10},
pages = {1--38},
type = {Article in Journal},
month = {November},
year = {2023},
issn = {2196-1115},
doi = {10.1186/s40537-023-00843-z},
keywords = {Data Catalog; Data Democratization; Data Market; Data Sharing; Enterprise Data Marketplace; Metadata Management},
language = {English},
cr-category = {E.m Data Miscellaneous,
H.3.7 Digital Libraries,
H.4.m Information Systems Applications Miscellaneous},
department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
abstract = {In this big data era, multitudes of data are generated and collected which
contain the potential to gain new insights, e.g., for enhancing business
models. To leverage this potential through, e.g., data science and analytics
projects, the data must be made available. In this context, data marketplaces
are used as platforms to facilitate the exchange and thus, the provisioning of
data and data-related services. Data marketplaces are mainly studied for the
exchange of data between organizations, i.e., as external data marketplaces.
Yet, the data collected within a company also has the potential to provide
valuable insights for this same company, for instance to optimize business
processes. Studies indicate, however, that a significant amount of data within
companies remains unused. In this sense, it is proposed to employ an Enterprise
Data Marketplace, a platform to democratize data within a company among its
employees. Specifics of the Enterprise Data Marketplace, how it can be
implemented or how it makes data available throughout a variety of systems like
data lakes has not been investigated in literature so far. Therefore, we
present the characteristics and requirements of this kind of marketplace. We
also distinguish it from other tools like data catalogs, provide a platform
architecture and highlight how it integrates with the company{\^a}€™s system
landscape. The presented concepts are demonstrated through an Enterprise Data
Marketplace prototype and an experiment reveals that this marketplace
significantly improves the data consumer workflows in terms of efficiency and
complexity. This paper is based on several interdisciplinary works combining
comprehensive research with practical experience from an industrial
perspective. We therefore present the Enterprise Data Marketplace as a distinct
marketplace type and provide the basis for establishing it within a company.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2023-07&engl=1}
}
@article {ART-2023-06,
author = {Christoph Stach and Cl{\'e}mentine Gritti},
title = {{Editorial to the Special Issue on Security and Privacy in Blockchains and the IoT Volume II}},
journal = {Future Internet},
address = {Basel, Schweiz},
publisher = {MDPI},
volume = {15},
number = {8},
pages = {1--7},
type = {Article in Journal},
month = {August},
year = {2023},
issn = {1999-5903},
doi = {10.3390/fi15080272},
language = {English},
cr-category = {D.4.6 Operating Systems Security and Protection,
K.4.1 Computers and Society Public Policy Issues,
K.6.5 Security and Protection},
ee = {https://www.mdpi.com/1999-5903/15/8/272/htm},
contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de.},
department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
abstract = {In this age of data-driven transformation, where the fusion of blockchain
technologies and the Internet of Things (IoT) is shaping the fabric of our
digital society, the need for security and privacy has never been more
important. This Special Issue delves into the intricate confluence of these two
disruptive forces and provides a comprehensive overview of security and privacy
aspects in this regard. Focusing on protection goals such as confidentiality,
integrity, availability, and privacy, this compilation encapsulates the essence
of these multi-layered challenges. Ranging from complex data-driven
applications and smart services to novel approaches that enhance security and
privacy in the context of blockchain technologies and the IoT, the research
articles and literature reviews presented here offer a sophisticated mesh of
insights. Innovative solutions are highlighted from a variety of perspectives,
and challenges such as secure data transmission, confidential communication,
and tamper-proof data storage are explored.
In this way, this Special Issue is a beacon for practitioners, researchers, and
technology enthusiasts. Developers seeking to harness the potential of
blockchain technology and IoT find rich insights while users get a
comprehensive overview of the latest research and trends. The symphony of
interdisciplinary knowledge presented here creates a harmonious blend of theory
and practice, highlighting the intricate interdependencies between
technological advances and the need for security and privacy.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2023-06&engl=1}
}
@article {ART-2023-05,
author = {Christoph Stach and Rebecca Eichler and Simone Schmidt},
title = {{A Recommender Approach to Enable Effective and Efficient Self-Service Analytics in Data Lakes}},
journal = {Datenbank-Spektrum},
publisher = {Springer Nature},
volume = {23},
number = {2},
pages = {123--132},
type = {Article in Journal},
month = {June},
year = {2023},
issn = {1618-2162},
doi = {10.1007/s13222-023-00443-4},
keywords = {Data Lake; Data Preparation; Data Pre-Processing; Data Refinement; Recommender; Self-Service Analytics},
language = {English},
cr-category = {H.2.7 Database Administration,
E.2 Data Storage Representations,
H.3.3 Information Search and Retrieval,
H.2.8 Database Applications},
contact = {Senden Sie eine E-Mail an christoph.stach@ipvs.uni-stuttgart.de.},
department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
abstract = {As a result of the paradigm shift away from rather rigid data warehouses to
general-purpose data lakes, fully flexible self-service analytics is made
possible. However, this also increases the complexity for domain experts who
perform these analyses, since comprehensive data preparation tasks have to be
implemented for each data access. For this reason, we developed BARENTS, a
toolset that enables domain experts to specify data preparation tasks as
ontology rules, which are then applied to the data involved. Although our
evaluation of BARENTS showed that it is a valuable contribution to self-service
analytics, a major drawback is that domain experts do not receive any semantic
support when specifying the rules. In this paper, we therefore address how a
recommender approach can provide additional support to domain experts by
identifying supplementary datasets that might be relevant for their analyses or
additional data processing steps to improve data refinement. This recommender
operates on the set of data preparation rules specified in BARENTS-i.e., the
accumulated knowledge of all domain experts is factored into the data
preparation for each new analysis. Evaluation results indicate that such a
recommender approach further contributes to the practicality of BARENTS and
thus represents a step towards effective and efficient self-service analytics
in data lakes.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2023-05&engl=1}
}
@article {ART-2023-04,
author = {Alejandro Gabriel Villanueva Zacarias and Peter Reimann and Christian Weber and Bernhard Mitschang},
title = {{AssistML: An Approach to Manage, Recommend and Reuse ML Solutions}},
journal = {International Journal of Data Science and Analytics (JDSA)},
publisher = {Springer Nature},
type = {Article in Journal},
month = {July},
year = {2023},
keywords = {Meta-learning; Machine learning; AutoML; Metadata; Recommender systems},
language = {English},
cr-category = {H.2.8 Database Applications},
department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
abstract = {The adoption of machine learning (ML) in organizations is characterized by the
use of multiple ML software components. When building ML systems out of these
software components, citizen data scientists face practical requirements which
go beyond the known challenges of ML, e.g., data engineering or parameter
optimization. They are expected to quickly identify ML system options that
strike a suitable trade-off across multiple performance criteria. These options
also need to be understandable for non-technical users. Addressing these
practical requirements represents a problem for citizen data scientists with
limited ML experience. This calls for a concept to help them identify suitable
ML software combinations. Related work, e.g., AutoML systems, are not
responsive enough or cannot balance different performance criteria. This paper
explains how AssistML, a novel concept to recommend ML solutions, i.e.,
software systems with ML models, can be used as an alternative for predictive
use cases. Our concept collects and preprocesses metadata of existing ML
solutions to quickly identify the ML solutions that can be reused in a new use
case. We implement AssistML and evaluate it with two exemplary use cases.
Results show that AssistML can recommend ML solutions in line with users{\^a}€™
performance preferences in seconds. Compared to AutoML, AssistML offers citizen
data scientists simpler, intuitively explained ML solutions in considerably
less time. Moreover, these solutions perform similarly or even better than
AutoML models.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2023-04&engl=1}
}
@article {ART-2023-03,
author = {Dennis Treder-Tschechlov and Manuel Fritz and Holger Schwarz and Bernhard Mitschang},
title = {{ML2DAC: Meta-Learning to Democratize AutoML for Clustering Analysis}},
journal = {Proceedings of the ACM on Management of Data (SIGMOD)},
publisher = {Association for Computing Machinery (ACM)},
volume = {1},
number = {2},
pages = {1--26},
type = {Article in Journal},
month = {June},
year = {2023},
doi = {10.1145/3589289},
language = {German},
cr-category = {I.5.3 Pattern Recognition Clustering},
department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
abstract = {Analysts often struggle with the combined algorithm selection and
hyperparameter optimization problem, a.k.a. CASH problem in literature.
Typically, they execute several algorithms with varying hyperparameter settings
to find configurations that show valuable results. Efficiently finding these
configurations is a major challenge. In clustering analyses, analysts face the
additional challenge to select a cluster validity index that allows them to
evaluate clustering results in a purely unsupervised fashion. Many different
cluster validity indices exist and each one has its benefits depending on the
dataset characteristics. While experienced analysts might address these
challenges using their domain knowledge and experience, especially novice
analysts struggle with them. In this paper, we propose a new meta-learning
approach to address these challenges. Our approach uses knowledge from past
clustering evaluations to apply strategies that experienced analysts would
exploit. In particular, we use meta-learning to (a) select a suitable
clustering validity index, (b) efficiently select well-performing clustering
algorithm and hyperparameter configurations, and (c) reduce the search space to
suitable clustering algorithms. In the evaluation, we show that our approach
significantly outperforms state-of-the-art approaches regarding accuracy and
runtime.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2023-03&engl=1}
}
@article {ART-2023-02,
author = {Vitali Hirsch and Peter Reimann and Dennis Treder-Tschechlov and Holger Schwarz and Bernhard Mitschang},
title = {{Exploiting Domain Knowledge to address Class Imbalance and a Heterogeneous Feature Space in Multi-Class Classification}},
journal = {International Journal on Very Large Data Bases (VLDB-Journal)},
publisher = {Springer},
type = {Article in Journal},
month = {February},
year = {2023},
keywords = {Classification; Domain knowledge; Multi-class Imbalance; Heterogeneous feature space},
language = {English},
cr-category = {H.2.8 Database Applications},
department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
abstract = {Real-world data of multi-class classification tasks often show complex data
characteristics that lead to a reduced classification performance. Major
analytical challenges are a high degree of multi-class imbalance within data
and a heterogeneous feature space, which increases the number and complexity of
class patterns. Existing solutions to classification or data pre- processing
only address one of these two challenges in isolation. We propose a novel
classification approach that explicitly addresses both challenges of
multi-class imbalance and heterogeneous feature space together. As main
contribution, this approach exploits domain knowledge in terms of a taxonomy to
systematically prepare the training data. Based on an experimental evaluation
on both real-world data and several synthetically generated data sets, we show
that our approach outperforms any other classification technique in terms of
accuracy. Furthermore, it entails considerable practical benefits in real-world
use cases, e.g., it reduces rework required in the area of product quality
control.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2023-02&engl=1}
}
@article {ART-2023-01,
author = {Christoph Stach},
title = {{Data Is the New Oil--Sort of: A View on Why This Comparison Is Misleading and Its Implications for Modern Data Administration}},
journal = {Future Internet},
publisher = {MDPI},
volume = {15},
number = {2},
pages = {1--49},
type = {Article in Journal},
month = {February},
year = {2023},
issn = {1999-5903},
doi = {10.3390/fi15020071},
keywords = {data characteristics; data administration; data refinement; reliability; security; privacy},
language = {English},
cr-category = {E.0 Data General,
H.3 Information Storage and Retrieval,
K.6.5 Security and Protection,
K.4.1 Computers and Society Public Policy Issues},
ee = {https://www.mdpi.com/1999-5903/15/2/71/htm},
contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de.},
department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
abstract = {Currently, data are often referred to as the oil of the 21st century. This
comparison is not only used to express that the resource data are just as
important for the fourth industrial revolution as oil was for the technological
revolution in the late 19th century. There are also further similarities
between these two valuable resources in terms of their handling. Both must
first be discovered and extracted from their sources. Then, the raw materials
must be cleaned, preprocessed, and stored before they can finally be delivered
to consumers. Despite these undeniable similarities, however, there are
significant differences between oil and data in all of these processing steps,
making data a resource that is considerably more challenging to handle. For
instance, data sources, as well as the data themselves, are heterogeneous,
which means there is no one-size-fits-all data acquisition solution.
Furthermore, data can be distorted by the source or by third parties without
being noticed, which affects both quality and usability. Unlike oil, there is
also no uniform refinement process for data, as data preparation should be
tailored to the subsequent consumers and their intended use cases. With regard
to storage, it has to be taken into account that data are not consumed when
they are processed or delivered to consumers, which means that the data volume
that has to be managed is constantly growing. Finally, data may be subject to
special constraints in terms of distribution, which may entail individual
delivery plans depending on the customer and their intended purposes. Overall,
it can be concluded that innovative approaches are needed for handling the
resource data that address these inherent challenges. In this paper, we
therefore study and discuss the relevant characteristics of data making them
such a challenging resource to handle. In order to enable appropriate data
provisioning, we introduce a holistic research concept from data source to data
sink that respects the processing requirements of data producers as well as the
quality requirements of data consumers and, moreover, ensures a trustworthy
data administration.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2023-01&engl=1}
}
@book {BOOK-2023-02,
editor = {Christoph Stach and Cl{\'e}mentine Gritti},
title = {{Security and Privacy in Blockchains and the IoT II}},
address = {Basel, Beijing, Wuhan, Barcelona, Belgrade, Novi Sad, Cluj, Manchester},
publisher = {MDPI},
series = {Future Internet},
pages = {480},
type = {Book},
month = {September},
year = {2023},
isbn = {978-3-0365-8772-1},
doi = {10.3390/books978-3-0365-8773-8},
keywords = {authentication; blockchain; demand-driven data provision; digital signatures; distributed ledger technology; encryption; Internet of Things; privacy-aware data processing; secure data management; smart things},
language = {English},
cr-category = {K.6.5 Security and Protection,
K.4.1 Computers and Society Public Policy Issues},
ee = {https://www.mdpi.com/books/book/7885},
contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de.},
department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems;
University of Stuttgart, Institute of Parallel and Distributed Systems},
abstract = {In this age of data-driven transformation, where the fusion of blockchain
technologies and the Internet of Things (IoT) is shaping the fabric of our
digital society, the need for security and privacy has never been more
important. This Special Issue delves into the intricate confluence of these two
disruptive forces and provides a comprehensive overview of security and privacy
aspects in this regard. Focusing on protection goals such as confidentiality,
integrity, availability, and privacy, this compilation encapsulates the essence
of these multi-layered challenges. Ranging from complex data-driven
applications and smart services to novel approaches that enhance security and
privacy in the context of blockchain technologies and the IoT, the research
articles and literature reviews presented here offer a sophisticated mesh of
insights. Innovative solutions are highlighted from a variety of perspectives,
and challenges such as secure data transmission, confidential communication,
and tamper-proof data storage are explored.
In this way, this Special Issue is a beacon for practitioners, researchers, and
technology enthusiasts. Developers seeking to harness the potential of
blockchain technology and IoT find rich insights while users get a
comprehensive overview of the latest research and trends. The symphony of
interdisciplinary knowledge presented here creates a harmonious blend of theory
and practice, highlighting the intricate interdependencies between
technological advances and the need for security and privacy.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=BOOK-2023-02&engl=1}
}
@book {BOOK-2023-01,
editor = {Christoph Stach},
title = {{Security and Privacy in Blockchains and the IoT}},
address = {Basel, Beijing, Wuhan, Barcelona, Belgrade, Manchester, Tokyo, Cluj, Tianjin},
publisher = {MDPI},
series = {Future Internet},
pages = {166},
type = {Book},
month = {January},
year = {2023},
isbn = {978-3-0365-6251-3},
doi = {10.3390/books978-3-0365-6252-0},
keywords = {Blockchain; IoT; Confidentiality; Integrity; Authenticity; Access Control; Security; Privacy; Efficient Blockchain Technologies; Trustworthy Smart Services; Privacy-Aware Machine Learning; Data Protection Laws},
language = {English},
cr-category = {K.6.5 Security and Protection,
K.4.1 Computers and Society Public Policy Issues},
ee = {https://www.mdpi.com/books/book/6686},
contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de.},
department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems;
University of Stuttgart, Institute of Parallel and Distributed Systems},
abstract = {In this day and age, data have become an immensely valuable resource. They are
the key driver that puts the smart into smart services. This is fundamentally
fueled by two technological achievements, namely the Internet of Things (IoT),
which enables continuous and comprehensive collection of all kinds of data, and
blockchain technologies, which provide secure data management and exchange. In
addition to those information security measures, however, data privacy
solutions are also required to protect the involved sensitive data. In this
book, eight research papers address security and privacy challenges when
dealing with blockchain technologies and the IoT. Concerning the IoT, solutions
are presented on how IoT group communication can be secured and how trust
within IoT applications can be increased. In the context of blockchain
technologies, approaches are introduced on how query processing capabilities
can be enhanced and how a proof-of-work consensus protocol can be efficiently
applied in IoT environments. Furthermore, it is discussed how blockchain
technologies can be used in IoT environments to control access to confidential
IoT data as well as to enable privacy-aware data sharing. Finally, two reviews
give an overview of the state of the art in in-app activity recognition based
on convolutional neural networks and the prospects for blockchain technology
applications in ambient assisted living.},
url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=BOOK-2023-01&engl=1}
}