Institute for Parallel and Distributed Systems (IPVS)

Publications

An overview of publications of the Institute for Parallel and Distributed Systems.

Publications AS: Bibliography 2023 BibTeX

 
@inproceedings {INPROC-2023-07,
   author = {Andrea Fieschi and Yunxuan Li and Pascal Hirmer and Christoph Stach and Bernhard Mitschang},
   title = {{Privacy in Connected Vehicles: Perspectives of Drivers and Car Manufacturers}},
   booktitle = {Service-Oriented Computing: 17th Symposium and Summer School, SummerSOC 2023, Heraklion, Crete, Greece, June 25 – July 1, 2023, Revised Selected Papers},
   editor = {Marco Aiello and Johanna Barzen and Schahram Dustdar and Frank Leymann},
   address = {Cham},
   publisher = {Springer Nature Switzerland},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   series = {Communications in Computer and Information Science},
   volume = {1847},
   pages = {59--68},
   type = {Conference Paper},
   month = {October},
   year = {2023},
   isbn = {978-3-031-45727-2},
   doi = {10.1007/978-3-031-45728-9_4},
   keywords = {Connected Vehicles; Privacy; Anonymization},
   language = {English},
   cr-category = {K.4.1 Computers and Society Public Policy Issues},
   contact = {Senden Sie eine E-Mail an Andrea Fieschi (Andrea.Fieschi@ipvs.uni-stuttgart.de) oder Yunxuan Li (Yunxuan.Li@ipvs.uni-stuttgart.de).},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {The digital revolution has led to significant technological advancements in the automotive industry, enabling vehicles to process and share information with other vehicles and the cloud. However, as data sharing becomes more prevalent, privacy protection has become an essential issue. In this paper, we explore various privacy challenges regarding different perspectives of drivers and car manufacturers. We also propose general approaches to overcome these challenges with respect to their individual needs. Finally, we highlight the importance of collaboration between drivers and car manufacturers to establish trust and achieve better privacy protection.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2023-07&engl=1}
}
@inproceedings {INPROC-2023-06,
   author = {Jan Schneider and Christoph Gr{\"o}ger and Arnold Lutsch and Holger Schwarz and Bernhard Mitschang},
   title = {{Assessing the Lakehouse: Analysis, Requirements and Definition}},
   booktitle = {Proceedings of the 25th International Conference on Enterprise Information Systems, ICEIS 2023, Volume 1, Prague, Czech Republic, April 24-26, 2023},
   editor = {Joaquim Filipe and Michal Smialek and Alexander Brodsky and Slimane Hammoudi},
   address = {Prague},
   publisher = {SciTePress},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   pages = {44--56},
   type = {Conference Paper},
   month = {May},
   year = {2023},
   isbn = {978-989-758-648-4},
   issn = {2184-4992},
   doi = {10.5220/0011840500003467},
   keywords = {Lakehouse; Data Warehouse; Data Lake; Data Management; Data Analytics},
   language = {English},
   cr-category = {H.2.4 Database Management Systems,     H.2.7 Database Administration,     H.2.8 Database Applications},
   ee = {https://www.scitepress.org/PublicationsDetail.aspx?ID=9ydI3Lyl2Fk=,     https://doi.org/10.5220/0011840500003467},
   contact = {jan.schneider@ipvs.uni-stuttgart.de},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {The digital transformation opens new opportunities for enterprises to optimize their business processes by applying data-driven analysis techniques. For storing and organizing the required huge amounts of data, different types of data platforms have been employed in the past, with data warehouses and data lakes being the most prominent ones. Since they possess rather contrary characteristics and address different types of analytics, companies typically utilize both of them, leading to complex architectures with replicated data and slow analytical processes. To counter these issues, vendors have recently been making efforts to break the boundaries and to combine features of both worlds into integrated data platforms. Such systems are commonly called lakehouses and promise to simplify enterprise analytics architectures by serving all kinds of analytical workloads from a single platform. However, it remains unclear how lakehouses can be characterized, since existing definitions focus al most arbitrarily on individual architectural or functional aspects and are often driven by marketing. In this paper, we assess prevalent definitions for lakehouses and finally propose a new definition, from which several technical requirements for lakehouses are derived. We apply these requirements to several popular data management tools, such as Delta Lake, Snowflake and Dremio in order to evaluate whether they enable the construction of lakehouses.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2023-06&engl=1}
}
@inproceedings {INPROC-2023-05,
   author = {Thomas Ackermann and Robert Miehe and Peter Reimann and Bernhard Mitschang and Ralf Takors and Thomas Bauernhansl},
   title = {{A Cross-Disciplinary Training Concept for Future Technologists in the Dawn of Biointelligent Production Systems}},
   booktitle = {Procedia CIRP: Proceedings of 13th CIRP Conference on Learning Factories (CIRP CLF)},
   publisher = {Elsevier BV},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   type = {Conference Paper},
   month = {May},
   year = {2023},
   keywords = {Biointelligent systems; Biological transformation; Converging technologies; Qualification},
   language = {English},
   cr-category = {H.2.8 Database Applications},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Biologicalization is considered one of the most important transformation processes in industry alongside digitalization. This work presents a qualification concept within the Stuttgart Biointelligent Manufacturing Framework (BioMEFUS), which is intended to provide skills and experiences at the intersections between manufacturing and process engineering, computer science and life science. Life cycle management, production methods and engineering of components towards the development and implementation of biointelligent systems are considered as the major engineering platforms of the framework. The qualification concept is developed for early stage researchers (ESRs) at the doctorate stage. It provides a mapping of individual research projects in the field of biointelligent production systems and contains subject-related and methodological building blocks for the formation of future experts and decision-makers in the course of biological transformation.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2023-05&engl=1}
}
@inproceedings {INPROC-2023-04,
   author = {Julius Voggesberger and Peter Reimann and Bernhard Mitschang},
   title = {{Towards the Automatic Creation of Optimized Classifier Ensembles}},
   booktitle = {Proceedings of the 25th International Conference on Enterprise Information Systems (ICEIS 2023)},
   publisher = {SciTePress},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   pages = {614--621},
   type = {Conference Paper},
   month = {April},
   year = {2023},
   keywords = {Classifier Ensembles; Classifier Diversity; Decision Fusion; AutoML; Machine Learning},
   language = {English},
   cr-category = {H.2.8 Database Applications},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Classifier ensemble algorithms allow for the creation of combined machine learning models that are more accurate and generalizable than individual classifiers. However, creating such an ensemble is complex, as several requirements must be fulfilled. An expert has to select multiple classifiers that are both accurate and diverse. In addition, a decision fusion algorithm must be selected to combine the predictions of these classifiers into a consensus decision. Satisfying these requirements is challenging even for experts, as it requires a lot of time and knowledge. In this position paper, we propose to automate the creation of classifier ensembles. While there already exist several frameworks that automatically create multiple classifiers, none of them meet all requirements to build optimized ensembles based on these individual classifiers. Hence, we introduce and compare three basic approaches that tackle this challenge. Based on the comparison results, we propose one of the approaches that best meets the requirements to lay the foundation for future work.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2023-04&engl=1}
}
@inproceedings {INPROC-2023-03,
   author = {Yannick Wilhelm and Peter Reimann and Wolfgang Gauchel and Steffen Klein and Bernhard Mitschang},
   title = {{PUSION- A Generic and Automated Framework for Decision Fusion}},
   booktitle = {Proceedings of the 39th IEEE International Conference on Data Engineering (ICDE 2023)},
   publisher = {IEEE},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   type = {Conference Paper},
   month = {April},
   year = {2023},
   keywords = {Classifier ensembles; decision fusion; automated decision fusion; hybrid fault diagnosis},
   language = {English},
   cr-category = {H.2.8 Database Applications},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Combining two or more classifiers into an ensemble and fusing the individual classifier decisions to a consensus decision can improve the accuracy for a classification problem. The classification improvement of the fusion result depends on numerous factors, such as the data set, the combination scenario, the decision fusion algorithm, as well as the prediction accuracies and diversity of the multiple classifiers to be combined. Due to these factors, the best decision fusion algorithm for a given decision fusion problem cannot be generally determined in advance. In order to support the user in combining classifiers and to achieve the best possible fusion result, we propose the PUSION (Python Universal fuSION) framework, a novel generic and automated framework for decision fusion of classifiers. The framework includes 14 decision fusion algorithms and covers a total of eight different combination scenarios for both multi-class and multi-label classification problems. The introduced concept of AutoFusion detects the combination scenario for a given use case, automatically selects the applicable decision fusion algorithms and returns the decision fusion algorithm that leads to the best fusion result. The framework is evaluated with two real-world case studies in the field of fault diagnosis. In both case studies, the consensus decision of multiple classifiers and heterogeneous fault diagnosis methods significantly increased the overall classification accuracy. Our evaluation results show that our framework is of practical relevance and reliably finds the best performing decision fusion algorithm for a given combination task.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2023-03&engl=1}
}
@inproceedings {INPROC-2023-02,
   author = {Dennis Treder-Tschechlov and Peter Reimann and Holger Schwarz and Bernhard Mitschang},
   title = {{Approach to Synthetic Data Generation for Imbalanced Multi-class Problems with Heterogeneous Groups}},
   booktitle = {Tagungsband der 20. Fachtagung Datenbanksysteme f{\"u}r Business, Technologie und Web (BTW 2019)},
   publisher = {GI Gesellschaft f{\"u}r Informatik e.V. (GI)},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   series = {Lecture Notes in Informatics (LNI)},
   pages = {329--351},
   type = {Conference Paper},
   month = {March},
   year = {2023},
   keywords = {Machine learning; classification; data generation; real-world data characteristics},
   language = {English},
   cr-category = {H.2.8 Database Applications},
   ee = {https://dl.gi.de/bitstream/handle/20.500.12116/40320/B3-5.pdf?},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {To benchmark novel classification algorithms, these algorithms should be evaluated on data with characteristics that also appear in real-world use cases. Important data characteristics that often lead to challenges for classification approaches are multi-class imbalance and heterogeneous groups. Heterogeneous groups are sets of real-world entities, where the classification patterns may vary among different groups and where the groups are typically imbalanced in the data. Real-world data that comprise these characteristics are usually not publicly available, e.g., because they constitute sensitive patient information or due to privacy concerns. Further, the manifestations of the characteristics cannot be controlled specifically on real-world data. A more rigorous approach is to synthetically generate data such that different manifestations of the characteristics can be controlled as well. However, existing data generators are not able to generate data that feature both data characteristics, i.e., multi-class imbalance and heterogeneous groups. In this paper, we propose an approach that fills this gap as it allows to synthetically generate data that exhibit both characteristics. We make use of a taxonomy model that organizes real-world entities in domain-specific heterogeneous groups to generate data reflecting the characteristics of these groups. Further, we incorporate probability distributions to reflect the imbalances of multiple classes and groups from real-world use cases. The evaluation shows that our approach can generate data that feature the data characteristics multi-class imbalance and heterogeneous groups and that it allows to control different manifestations of these characteristics.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2023-02&engl=1}
}
@inproceedings {INPROC-2023-01,
   author = {Yunxuan Li and Pascal Hirmer and Christoph Stach},
   title = {{CV-Priv: Towards a Context Model for Privacy Policy Creation for Connected Vehicles}},
   booktitle = {Proceedings of the 21st International Conference on Pervasive Computing and Communications Workshops},
   publisher = {IEEE},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   pages = {1--6},
   type = {Conference Paper},
   month = {March},
   year = {2023},
   keywords = {Context Modeling; Ontology; Privacy Policy; Privacy-Preserving; Connected Vehicle},
   language = {English},
   cr-category = {K.4.1 Computers and Society Public Policy Issues,     K.6.5 Security and Protection},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Connected vehicles are becoming progressively capable of collecting, processing, and sharing data, which leads to a growing concern about privacy in the automotive domain. However, research has shown that although users are highly concerned about their privacy, they usually find it difficult to configure privacy settings. This is because the privacy context, which represents the privacy circumstance a driver faces during the privacy policy creation, is highly complex. To create custom privacy policies, drivers must consider the privacy context information, such as what service is requesting data from which vehicle sensor, or what privacy countermeasures are available for vehicles and satisfy certain privacy properties. This easily leads to information and choice overhead. Therefore, we propose the novel ontology-based privacy context model, CV-Priv, for the modeling of such privacy context information for creating custom privacy policies in the automotive domain. In this paper, we analyze the design requirements for a privacy context model based on challenges drivers might face during the privacy policy creation phase. We also demonstrate how CV-Priv can be utilized by context-aware systems to help drivers transform their fuzzy privacy requirements into sound privacy policies.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2023-01&engl=1}
}
@article {ART-2023-07,
   author = {Rebecca Eichler and Christoph Gr{\"o}ger and Eva Hoos and Christoph Stach and Holger Schwarz and Bernhard Mitschang},
   title = {{Introducing the enterprise data marketplace: a platform for democratizing company data}},
   journal = {Journal of Big Data},
   publisher = {Springer Nature},
   volume = {10},
   pages = {1--38},
   type = {Article in Journal},
   month = {November},
   year = {2023},
   issn = {2196-1115},
   doi = {10.1186/s40537-023-00843-z},
   keywords = {Data Catalog; Data Democratization; Data Market; Data Sharing; Enterprise Data Marketplace; Metadata Management},
   language = {English},
   cr-category = {E.m Data Miscellaneous,     H.3.7 Digital Libraries,     H.4.m Information Systems Applications Miscellaneous},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {In this big data era, multitudes of data are generated and collected which contain the potential to gain new insights, e.g., for enhancing business models. To leverage this potential through, e.g., data science and analytics projects, the data must be made available. In this context, data marketplaces are used as platforms to facilitate the exchange and thus, the provisioning of data and data-related services. Data marketplaces are mainly studied for the exchange of data between organizations, i.e., as external data marketplaces. Yet, the data collected within a company also has the potential to provide valuable insights for this same company, for instance to optimize business processes. Studies indicate, however, that a significant amount of data within companies remains unused. In this sense, it is proposed to employ an Enterprise Data Marketplace, a platform to democratize data within a company among its employees. Specifics of the Enterprise Data Marketplace, how it can be implemented or how it makes data available throughout a variety of systems like data lakes has not been investigated in literature so far. Therefore, we present the characteristics and requirements of this kind of marketplace. We also distinguish it from other tools like data catalogs, provide a platform architecture and highlight how it integrates with the company{\^a}€™s system landscape. The presented concepts are demonstrated through an Enterprise Data Marketplace prototype and an experiment reveals that this marketplace significantly improves the data consumer workflows in terms of efficiency and complexity. This paper is based on several interdisciplinary works combining comprehensive research with practical experience from an industrial perspective. We therefore present the Enterprise Data Marketplace as a distinct marketplace type and provide the basis for establishing it within a company.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2023-07&engl=1}
}
@article {ART-2023-06,
   author = {Christoph Stach and Cl{\'e}mentine Gritti},
   title = {{Editorial to the Special Issue on Security and Privacy in Blockchains and the IoT Volume II}},
   journal = {Future Internet},
   address = {Basel, Schweiz},
   publisher = {MDPI},
   volume = {15},
   number = {8},
   pages = {1--7},
   type = {Article in Journal},
   month = {August},
   year = {2023},
   issn = {1999-5903},
   doi = {10.3390/fi15080272},
   language = {English},
   cr-category = {D.4.6 Operating Systems Security and Protection,     K.4.1 Computers and Society Public Policy Issues,     K.6.5 Security and Protection},
   ee = {https://www.mdpi.com/1999-5903/15/8/272/htm},
   contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de.},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {In this age of data-driven transformation, where the fusion of blockchain technologies and the Internet of Things (IoT) is shaping the fabric of our digital society, the need for security and privacy has never been more important. This Special Issue delves into the intricate confluence of these two disruptive forces and provides a comprehensive overview of security and privacy aspects in this regard. Focusing on protection goals such as confidentiality, integrity, availability, and privacy, this compilation encapsulates the essence of these multi-layered challenges. Ranging from complex data-driven applications and smart services to novel approaches that enhance security and privacy in the context of blockchain technologies and the IoT, the research articles and literature reviews presented here offer a sophisticated mesh of insights. Innovative solutions are highlighted from a variety of perspectives, and challenges such as secure data transmission, confidential communication, and tamper-proof data storage are explored. In this way, this Special Issue is a beacon for practitioners, researchers, and technology enthusiasts. Developers seeking to harness the potential of blockchain technology and IoT find rich insights while users get a comprehensive overview of the latest research and trends. The symphony of interdisciplinary knowledge presented here creates a harmonious blend of theory and practice, highlighting the intricate interdependencies between technological advances and the need for security and privacy.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2023-06&engl=1}
}
@article {ART-2023-05,
   author = {Christoph Stach and Rebecca Eichler and Simone Schmidt},
   title = {{A Recommender Approach to Enable Effective and Efficient Self-Service Analytics in Data Lakes}},
   journal = {Datenbank-Spektrum},
   publisher = {Springer Nature},
   volume = {23},
   number = {2},
   pages = {123--132},
   type = {Article in Journal},
   month = {June},
   year = {2023},
   issn = {1618-2162},
   doi = {10.1007/s13222-023-00443-4},
   keywords = {Data Lake; Data Preparation; Data Pre-Processing; Data Refinement; Recommender; Self-Service Analytics},
   language = {English},
   cr-category = {H.2.7 Database Administration,     E.2 Data Storage Representations,     H.3.3 Information Search and Retrieval,     H.2.8 Database Applications},
   contact = {Senden Sie eine E-Mail an christoph.stach@ipvs.uni-stuttgart.de.},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {As a result of the paradigm shift away from rather rigid data warehouses to general-purpose data lakes, fully flexible self-service analytics is made possible. However, this also increases the complexity for domain experts who perform these analyses, since comprehensive data preparation tasks have to be implemented for each data access. For this reason, we developed BARENTS, a toolset that enables domain experts to specify data preparation tasks as ontology rules, which are then applied to the data involved. Although our evaluation of BARENTS showed that it is a valuable contribution to self-service analytics, a major drawback is that domain experts do not receive any semantic support when specifying the rules. In this paper, we therefore address how a recommender approach can provide additional support to domain experts by identifying supplementary datasets that might be relevant for their analyses or additional data processing steps to improve data refinement. This recommender operates on the set of data preparation rules specified in BARENTS-i.e., the accumulated knowledge of all domain experts is factored into the data preparation for each new analysis. Evaluation results indicate that such a recommender approach further contributes to the practicality of BARENTS and thus represents a step towards effective and efficient self-service analytics in data lakes.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2023-05&engl=1}
}
@article {ART-2023-04,
   author = {Alejandro Gabriel Villanueva Zacarias and Peter Reimann and Christian Weber and Bernhard Mitschang},
   title = {{AssistML: An Approach to Manage, Recommend and Reuse ML Solutions}},
   journal = {International Journal of Data Science and Analytics (JDSA)},
   publisher = {Springer Nature},
   type = {Article in Journal},
   month = {July},
   year = {2023},
   keywords = {Meta-learning; Machine learning; AutoML; Metadata; Recommender systems},
   language = {English},
   cr-category = {H.2.8 Database Applications},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {The adoption of machine learning (ML) in organizations is characterized by the use of multiple ML software components. When building ML systems out of these software components, citizen data scientists face practical requirements which go beyond the known challenges of ML, e.g., data engineering or parameter optimization. They are expected to quickly identify ML system options that strike a suitable trade-off across multiple performance criteria. These options also need to be understandable for non-technical users. Addressing these practical requirements represents a problem for citizen data scientists with limited ML experience. This calls for a concept to help them identify suitable ML software combinations. Related work, e.g., AutoML systems, are not responsive enough or cannot balance different performance criteria. This paper explains how AssistML, a novel concept to recommend ML solutions, i.e., software systems with ML models, can be used as an alternative for predictive use cases. Our concept collects and preprocesses metadata of existing ML solutions to quickly identify the ML solutions that can be reused in a new use case. We implement AssistML and evaluate it with two exemplary use cases. Results show that AssistML can recommend ML solutions in line with users{\^a}€™ performance preferences in seconds. Compared to AutoML, AssistML offers citizen data scientists simpler, intuitively explained ML solutions in considerably less time. Moreover, these solutions perform similarly or even better than AutoML models.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2023-04&engl=1}
}
@article {ART-2023-03,
   author = {Dennis Treder-Tschechlov and Manuel Fritz and Holger Schwarz and Bernhard Mitschang},
   title = {{ML2DAC: Meta-Learning to Democratize AutoML for Clustering Analysis}},
   journal = {Proceedings of the ACM on Management of Data (SIGMOD)},
   publisher = {Association for Computing Machinery (ACM)},
   volume = {1},
   number = {2},
   pages = {1--26},
   type = {Article in Journal},
   month = {June},
   year = {2023},
   doi = {10.1145/3589289},
   language = {German},
   cr-category = {I.5.3 Pattern Recognition Clustering},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Analysts often struggle with the combined algorithm selection and hyperparameter optimization problem, a.k.a. CASH problem in literature. Typically, they execute several algorithms with varying hyperparameter settings to find configurations that show valuable results. Efficiently finding these configurations is a major challenge. In clustering analyses, analysts face the additional challenge to select a cluster validity index that allows them to evaluate clustering results in a purely unsupervised fashion. Many different cluster validity indices exist and each one has its benefits depending on the dataset characteristics. While experienced analysts might address these challenges using their domain knowledge and experience, especially novice analysts struggle with them. In this paper, we propose a new meta-learning approach to address these challenges. Our approach uses knowledge from past clustering evaluations to apply strategies that experienced analysts would exploit. In particular, we use meta-learning to (a) select a suitable clustering validity index, (b) efficiently select well-performing clustering algorithm and hyperparameter configurations, and (c) reduce the search space to suitable clustering algorithms. In the evaluation, we show that our approach significantly outperforms state-of-the-art approaches regarding accuracy and runtime.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2023-03&engl=1}
}
@article {ART-2023-02,
   author = {Vitali Hirsch and Peter Reimann and Dennis Treder-Tschechlov and Holger Schwarz and Bernhard Mitschang},
   title = {{Exploiting Domain Knowledge to address Class Imbalance and a Heterogeneous Feature Space in Multi-Class Classification}},
   journal = {International Journal on Very Large Data Bases (VLDB-Journal)},
   publisher = {Springer},
   type = {Article in Journal},
   month = {February},
   year = {2023},
   keywords = {Classification; Domain knowledge; Multi-class Imbalance; Heterogeneous feature space},
   language = {English},
   cr-category = {H.2.8 Database Applications},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Real-world data of multi-class classification tasks often show complex data characteristics that lead to a reduced classification performance. Major analytical challenges are a high degree of multi-class imbalance within data and a heterogeneous feature space, which increases the number and complexity of class patterns. Existing solutions to classification or data pre- processing only address one of these two challenges in isolation. We propose a novel classification approach that explicitly addresses both challenges of multi-class imbalance and heterogeneous feature space together. As main contribution, this approach exploits domain knowledge in terms of a taxonomy to systematically prepare the training data. Based on an experimental evaluation on both real-world data and several synthetically generated data sets, we show that our approach outperforms any other classification technique in terms of accuracy. Furthermore, it entails considerable practical benefits in real-world use cases, e.g., it reduces rework required in the area of product quality control.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2023-02&engl=1}
}
@article {ART-2023-01,
   author = {Christoph Stach},
   title = {{Data Is the New Oil--Sort of: A View on Why This Comparison Is Misleading and Its Implications for Modern Data Administration}},
   journal = {Future Internet},
   publisher = {MDPI},
   volume = {15},
   number = {2},
   pages = {1--49},
   type = {Article in Journal},
   month = {February},
   year = {2023},
   issn = {1999-5903},
   doi = {10.3390/fi15020071},
   keywords = {data characteristics; data administration; data refinement; reliability; security; privacy},
   language = {English},
   cr-category = {E.0 Data General,     H.3 Information Storage and Retrieval,     K.6.5 Security and Protection,     K.4.1 Computers and Society Public Policy Issues},
   ee = {https://www.mdpi.com/1999-5903/15/2/71/htm},
   contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de.},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Currently, data are often referred to as the oil of the 21st century. This comparison is not only used to express that the resource data are just as important for the fourth industrial revolution as oil was for the technological revolution in the late 19th century. There are also further similarities between these two valuable resources in terms of their handling. Both must first be discovered and extracted from their sources. Then, the raw materials must be cleaned, preprocessed, and stored before they can finally be delivered to consumers. Despite these undeniable similarities, however, there are significant differences between oil and data in all of these processing steps, making data a resource that is considerably more challenging to handle. For instance, data sources, as well as the data themselves, are heterogeneous, which means there is no one-size-fits-all data acquisition solution. Furthermore, data can be distorted by the source or by third parties without being noticed, which affects both quality and usability. Unlike oil, there is also no uniform refinement process for data, as data preparation should be tailored to the subsequent consumers and their intended use cases. With regard to storage, it has to be taken into account that data are not consumed when they are processed or delivered to consumers, which means that the data volume that has to be managed is constantly growing. Finally, data may be subject to special constraints in terms of distribution, which may entail individual delivery plans depending on the customer and their intended purposes. Overall, it can be concluded that innovative approaches are needed for handling the resource data that address these inherent challenges. In this paper, we therefore study and discuss the relevant characteristics of data making them such a challenging resource to handle. In order to enable appropriate data provisioning, we introduce a holistic research concept from data source to data sink that respects the processing requirements of data producers as well as the quality requirements of data consumers and, moreover, ensures a trustworthy data administration.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2023-01&engl=1}
}
@book {BOOK-2023-02,
   editor = {Christoph Stach and Cl{\'e}mentine Gritti},
   title = {{Security and Privacy in Blockchains and the IoT II}},
   address = {Basel, Beijing, Wuhan, Barcelona, Belgrade, Novi Sad, Cluj, Manchester},
   publisher = {MDPI},
   series = {Future Internet},
   pages = {480},
   type = {Book},
   month = {September},
   year = {2023},
   isbn = {978-3-0365-8772-1},
   doi = {10.3390/books978-3-0365-8773-8},
   keywords = {authentication; blockchain; demand-driven data provision; digital signatures; distributed ledger technology; encryption; Internet of Things; privacy-aware data processing; secure data management; smart things},
   language = {English},
   cr-category = {K.6.5 Security and Protection,     K.4.1 Computers and Society Public Policy Issues},
   ee = {https://www.mdpi.com/books/book/7885},
   contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de.},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems;     University of Stuttgart, Institute of Parallel and Distributed Systems},
   abstract = {In this age of data-driven transformation, where the fusion of blockchain technologies and the Internet of Things (IoT) is shaping the fabric of our digital society, the need for security and privacy has never been more important. This Special Issue delves into the intricate confluence of these two disruptive forces and provides a comprehensive overview of security and privacy aspects in this regard. Focusing on protection goals such as confidentiality, integrity, availability, and privacy, this compilation encapsulates the essence of these multi-layered challenges. Ranging from complex data-driven applications and smart services to novel approaches that enhance security and privacy in the context of blockchain technologies and the IoT, the research articles and literature reviews presented here offer a sophisticated mesh of insights. Innovative solutions are highlighted from a variety of perspectives, and challenges such as secure data transmission, confidential communication, and tamper-proof data storage are explored. In this way, this Special Issue is a beacon for practitioners, researchers, and technology enthusiasts. Developers seeking to harness the potential of blockchain technology and IoT find rich insights while users get a comprehensive overview of the latest research and trends. The symphony of interdisciplinary knowledge presented here creates a harmonious blend of theory and practice, highlighting the intricate interdependencies between technological advances and the need for security and privacy.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=BOOK-2023-02&engl=1}
}
@book {BOOK-2023-01,
   editor = {Christoph Stach},
   title = {{Security and Privacy in Blockchains and the IoT}},
   address = {Basel, Beijing, Wuhan, Barcelona, Belgrade, Manchester, Tokyo, Cluj, Tianjin},
   publisher = {MDPI},
   series = {Future Internet},
   pages = {166},
   type = {Book},
   month = {January},
   year = {2023},
   isbn = {978-3-0365-6251-3},
   doi = {10.3390/books978-3-0365-6252-0},
   keywords = {Blockchain; IoT; Confidentiality; Integrity; Authenticity; Access Control; Security; Privacy; Efficient Blockchain Technologies; Trustworthy Smart Services; Privacy-Aware Machine Learning; Data Protection Laws},
   language = {English},
   cr-category = {K.6.5 Security and Protection,     K.4.1 Computers and Society Public Policy Issues},
   ee = {https://www.mdpi.com/books/book/6686},
   contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de.},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems;     University of Stuttgart, Institute of Parallel and Distributed Systems},
   abstract = {In this day and age, data have become an immensely valuable resource. They are the key driver that puts the smart into smart services. This is fundamentally fueled by two technological achievements, namely the Internet of Things (IoT), which enables continuous and comprehensive collection of all kinds of data, and blockchain technologies, which provide secure data management and exchange. In addition to those information security measures, however, data privacy solutions are also required to protect the involved sensitive data. In this book, eight research papers address security and privacy challenges when dealing with blockchain technologies and the IoT. Concerning the IoT, solutions are presented on how IoT group communication can be secured and how trust within IoT applications can be increased. In the context of blockchain technologies, approaches are introduced on how query processing capabilities can be enhanced and how a proof-of-work consensus protocol can be efficiently applied in IoT environments. Furthermore, it is discussed how blockchain technologies can be used in IoT environments to control access to confidential IoT data as well as to enable privacy-aware data sharing. Finally, two reviews give an overview of the state of the art in in-app activity recognition based on convolutional neural networks and the prospects for blockchain technology applications in ambient assisted living.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=BOOK-2023-01&engl=1}
}
 
To the top of the page