Institute for Parallel and Distributed Systems (IPVS)

Publications

An overview of publications of the Institute for Parallel and Distributed Systems.

Publications AS: Bibliography 2025 BibTeX

 
@inproceedings {INPROC-2025-06,
   author = {Julius Voggesberger and Peter Reimann and Dennis Treder-Tschechlov and Bernhard Mitschang},
   title = {{Auto-CEn: AutoML for Classifier Ensembles - Diversity-Based Classifier Selection and Decision Fusion Optimization}},
   booktitle = {2025 IEEE 12th International Conference on Data Science and Advanced Analytics (DSAA)},
   editor = {IEEE},
   publisher = {IEEE},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   pages = {1--10},
   type = {Conference Paper},
   month = {October},
   year = {2025},
   isbn = {979-8-3315-1179-1},
   language = {English},
   cr-category = {I.2 Artificial Intelligence},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Classifier ensembles are mainly used for classification problems exhibiting complex data characteristics, e.g., a high class imbalance. They consist of a set of classifiers and a decision fusion method that combines the predictions of the classifiers. However, creating an accurate ensemble is a challenging optimization problem, since the individual classifiers and the decision fusion method must be selected from a huge search space that consists of numerous classification and fusion algorithms. Thereby, the selected classifiers have to be both accurate and diverse, so that they complement each other and make correct predictions on different data subsets. In addition, the selected decision fusion method has to accurately combine the predictions of individual classifiers into a consensus decision. Current literature focuses on AutoML approaches that reduce the complexity of the optimization problem by omitting the optimization of the classifier diversity and of the decision fusion. In this paper, we propose Auto-CEn as a novel ensemble approach based on AutoML that efficiently solves the optimization problem by selecting a set of accurate and diverse classifiers for the ensemble and by optimizing the decision fusion. In our extensive evaluation on 20 real-world datasets, we show that Auto-CEn outperforms several state-of-the-art baselines and that this improvement is mainly attributed to its novel contributions of diversity-based classifier selection and decision fusion optimization.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2025-06&engl=1}
}
@inproceedings {INPROC-2025-05,
   author = {Laura Schuiki and Corinna Giebler and Eva Hoos and Holger Schwarz},
   title = {{Unraveling Data Mesh: Current State, Challenges and Research Gaps}},
   booktitle = {Service-Oriented Computing: 19th Symposium and Summer School, SummerSOC 2025, Crete, Greece, June 16–21, 2025, Revised Selected Papers},
   publisher = {Springer, Cham},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   series = {Communications in Computer and Information Science},
   volume = {2602},
   pages = {59--79},
   type = {Conference Paper},
   month = {October},
   year = {2025},
   isbn = {"DOI: https://doi.org/10.1007/978-3-032-07313-6"},
   language = {English},
   cr-category = {H.2.1 Database Management Logical Design,     H.2.4 Database Management Systems},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Despite the emergence and rapid growth in popularity of data mesh in recent years, it is still a relatively new concept and there is still a lot of research to be done before it is fully explored. In particular it is unclear how a complete data mesh can be successfully implemented. As a first step, it is necessary to identify open implementation challenges in order to identify research gaps that will point researchers in the right direction. In particular, it is important to consider both literature and practical applications as sources of knowledge. To this end, we conducted a literature review and interviews with industry experts from a globally operating industrial company. Thereby we focused on the technical challenges associated with implementing a data mesh in a company with a well-established data infrastructure. In addition, we derived research gaps from these challenges that need to be addressed in future work.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2025-05&engl=1}
}
@inproceedings {INPROC-2025-04,
   author = {Laura Schuiki and Ulf Schreier and Holger Schwarz and Bernhard Mitschang},
   title = {{A Data Product Classification by Technical and Machine Learning Aspects}},
   booktitle = {Database and Expert Systems Applications: 36th International Conference, DEXA 2025, Bangkok, Thailand, August 25–27, 2025, Proceedings, Part I},
   publisher = {Springer Berlin Heidelberg},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   pages = {338--344},
   type = {Conference Paper},
   month = {September},
   year = {2025},
   isbn = {10.1007/978-3-032-02049-9_27},
   language = {English},
   cr-category = {H.2.1 Database Management Logical Design,     H.2.4 Database Management Systems},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Similar to the software services methodology, data products (DP) can be seen as a kind of data services that specify all important issues for data provisioning and data consumption. DPs come in many different varieties stretching from simple data pipelines to complex machine learning models and model inferences and, above all, typically result in complex data networks. It is time to come up with a useful categorization and structuring of the DP topic in order to conquer complexity. In this paper, we present and assess a basic classification approach that focuses on DP characteristics and thus provides the basis for blueprinting and architectural discussions.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2025-04&engl=1}
}
@inproceedings {INPROC-2025-03,
   author = {Andrea Fieschi and Pascal Hirmer and Christoph Stach},
   title = {{Discovering Suitable Anonymization Techniques: A Privacy Toolbox for Data Experts}},
   booktitle = {Datenbanksysteme f{\"u}r Business, Technologie und Web (BTW 2025)},
   editor = {Meike Klettke and Ralf Schenkel and Andreas Heinrich and Daniela Nicklas and Maximilian E. Sch{\"u}le and Klaus Meyer-Wegener},
   address = {Bonn},
   publisher = {Gesellschaft f{\"u}r Informatik},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   series = {Lecture Notes in Informatics},
   volume = {P361},
   pages = {827--833},
   type = {Demonstration},
   month = {March},
   year = {2025},
   issn = {2944-7682},
   doi = {10.18420/BTW2025-48},
   keywords = {Anonymization; Privacy-Enhancing Techniques; Anonymization by Design},
   language = {English},
   cr-category = {K.4.1 Computers and Society Public Policy Issues},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Identifying the appropriate anonymization technique is a critical yet challenging task for developers, data scientists, and security practitioners. Our interactive toolbox addresses this challenge by providing a comprehensive overview of available anonymization techniques to assist privacy-conscious developers in selecting the right one for their specific use cases. The toolbox offers a hierarchical and classified overview of techniques, each detailed with meta-model information. It employs a modular approach, allowing techniques to be implemented and deployed independently. Additionally, it enables developers to evaluate these techniques on test datasets. Our toolbox allows for the easy addition of new categories and modules. This paper demonstrates the anonymization toolbox{\^a}€™s capabilities, simplifying the decision-making process in the Anonymization by Design cycle by ensuring overview, modularity, and flexibility.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2025-03&engl=1}
}
@inproceedings {INPROC-2025-02,
   author = {Andrea Fieschi and Pascal Hirmer and Christoph Stach and Bernhard Mitschang},
   title = {{Characterising and Categorising Anonymization Techniques: A Literature-Based Approach}},
   booktitle = {Proceedings of the 11th International Conference on Information Systems Security and Privacy - Volume 1 (ICISSP 2025)},
   editor = {Roberto Di Pietro and Karen Renaud and Paolo Mori},
   publisher = {SciTePress},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   pages = {107--118},
   type = {Conference Paper},
   month = {February},
   year = {2025},
   isbn = {978-989-758-735-1},
   issn = {2184-4356},
   doi = {10.5220/0013379100003899},
   keywords = {Privacy Protection; PRISMA Systematic Literature Research; Privacy-Enhancing Techniques; Anonymization Techniques},
   language = {English},
   cr-category = {K.4.1 Computers and Society Public Policy Issues},
   contact = {Senden Sie eine E-Mail an \<andrea.fieschi@ipvs.uni-stuttgart.de\>.},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Anonymization plays a crucial role in protecting personal data and ensuring information security. However, selecting the appropriate anonymization technique is a challenging task for developers, data scientists, and security practitioners due to the vast array of techniques available in both research and practice. This paper aims to assist users by offering a method for structuring a framework that helps them make informed decisions about the most appropriate anonymization techniques for their specific use cases. To achieve this, we first conduct a systematic literature review following the PRISMA guidelines to capture the current state of the art in anonymization techniques. Based on the findings from this review, we propose a conceptual organisation of anonymization techniques, designed to help users navigate the complex landscape of anonymization and choose techniques that align with their security requirements.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2025-02&engl=1}
}
@inproceedings {INPROC-2025-01,
   author = {Laura Schuiki and Christoph Stach and Corinna Giebler and Eva Hoos and Bernhard Mitschang},
   title = {{Enabling Trusted Data Sharing in Data Spaces: PROTON - A Privacy-by-Design Approach to Data Products}},
   booktitle = {Proceedings of the 11th International Conference on Information Systems Security and Privacy - Volume 1 (ICISSP 2025)},
   editor = {Roberto Di Pietro and Karen Renaud and Paolo Mori},
   publisher = {SciTePress},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   pages = {95--106},
   type = {Conference Paper},
   month = {February},
   year = {2025},
   isbn = {978-989-758-735-1},
   issn = {2184-4356},
   doi = {10.5220/0013372900003899},
   keywords = {Distributed Data Management; Data Product; Privacy},
   language = {English},
   cr-category = {E.1 Data Structures,     K.4.1 Computers and Society Public Policy Issues},
   contact = {Senden Sie eine E-Mail an \<laura-sophie.schuiki@ipvs.uni-stuttgart.de\>.},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {In the current era of data-driven innovation, the value of data can be significantly enhanced by facilitating its dissemination. In this context, the data mesh concept has gained popularity in recent years. Data Mesh includes domain experts who design so-called data products. It is imperative that all parties involved have trust in these data products. This applies in particular to data subjects who share their data, data owners who create the data products, and data consumers who use them. To establish such trust, privacy approaches are key. Due to the decentralized and distributed nature of data mesh, however, traditional privacy strategies cannot be applied. To address this issue, we present PROTON, a concept that facilitates the handling of PRivacy-cOmpliant daTa prOducts by desigN. PROTON is based on three pillars: a comprehensive description model for privacy requirements, an extended creation process that adheres to these requirements when compiling data products, and a refined access process for verifying compliance prior to data sharing. The practical applicability of PROTON is illustrated by means of a real-world application scenario that has been devised in collaboration with domain experts from our industry partner.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2025-01&engl=1}
}
@article {ART-2025-01,
   author = {Ulf Schreier and Peter Reimann and Bernhard Mitschang},
   title = {{A Kanban-based Approach to Manage Machine Learning Projects in Manufacturing}},
   journal = {Procedia CIRP: Proceedings of the 58th CIRP Conference on Manufacturing Systems (CIRP CMS)},
   publisher = {Elsevier},
   volume = {134},
   pages = {109--114},
   type = {Article in Journal},
   month = {April},
   year = {2025},
   doi = {10.1016/j.procir.2025.03.011},
   keywords = {Machine learning (ML); ML project management, machine learning operations (MLOps); Kanban; Scrum},
   language = {English},
   cr-category = {H.2.8 Database Applications},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {A growing number of machine learning (ML) projects in manufacturing require the collaboration of various experts. In addition to data scientists, stakeholders with production engineering knowledge have to specify and prioritize individual project tasks. Data engineers prepare input data, while machine learning operations (MLOps) engineers ensure that trained models are deployed and monitored within IT landscapes. Existing project management approaches, e.g., Scrum, have problems for ML projects, as they do not consider various expert roles or ML project stages. We propose a project management approach defining a Kanban workflow by readjusting stages of ML development lifecycles, e.g., CRISP DM. This makes it possible to map expert roles to stages of the Kanban workflow. An adapted Kanban board allows visualizing and reviewing the status of all project tasks. We validate our approach with specific use cases, showing that it facilitates ML project management in manufacturing.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2025-01&engl=1}
}
 
To the top of the page