@inproceedings {INPROC-2023-05,
   author = {Thomas Ackermann and Robert Miehe and Peter Reimann and Bernhard Mitschang and Ralf Takors and Thomas Bauernhansl},
   title = {{A Cross-Disciplinary Training Concept for Future Technologists in the Dawn of Biointelligent Production Systems}},
   booktitle = {Procedia CIRP: Proceedings of 13th CIRP Conference on Learning Factories (CIRP CLF)},
   publisher = {Elsevier BV},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   type = {Konferenz-Beitrag},
   month = {Mai},
   year = {2023},
   keywords = {Biointelligent systems; Biological transformation; Converging technologies; Qualification},
   language = {Englisch},
   cr-category = {H.2.8 Database Applications},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Biologicalization is considered one of the most important transformation processes in industry alongside digitalization. This work presents a qualification concept within the Stuttgart Biointelligent Manufacturing Framework (BioMEFUS), which is intended to provide skills and experiences at the intersections between manufacturing and process engineering, computer science and life science. Life cycle management, production methods and engineering of components towards the development and implementation of biointelligent systems are considered as the major engineering platforms of the framework. The qualification concept is developed for early stage researchers (ESRs) at the doctorate stage. It provides a mapping of individual research projects in the field of biointelligent production systems and contains subject-related and methodological building blocks for the formation of future experts and decision-makers in the course of biological transformation.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2023-05&engl=0}
}
@inproceedings {INPROC-2023-04,
   author = {Julius Voggesberger and Peter Reimann and Bernhard Mitschang},
   title = {{Towards the Automatic Creation of Optimized Classifier Ensembles}},
   booktitle = {Proceedings of the 25th International Conference on Enterprise Information Systems (ICEIS 2023)},
   publisher = {SciTePress},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {614--621},
   type = {Konferenz-Beitrag},
   month = {April},
   year = {2023},
   keywords = {Classifier Ensembles; Classifier Diversity; Decision Fusion; AutoML; Machine Learning},
   language = {Englisch},
   cr-category = {H.2.8 Database Applications},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Classifier ensemble algorithms allow for the creation of combined machine learning models that are more accurate and generalizable than individual classifiers. However, creating such an ensemble is complex, as several requirements must be fulfilled. An expert has to select multiple classifiers that are both accurate and diverse. In addition, a decision fusion algorithm must be selected to combine the predictions of these classifiers into a consensus decision. Satisfying these requirements is challenging even for experts, as it requires a lot of time and knowledge. In this position paper, we propose to automate the creation of classifier ensembles. While there already exist several frameworks that automatically create multiple classifiers, none of them meet all requirements to build optimized ensembles based on these individual classifiers. Hence, we introduce and compare three basic approaches that tackle this challenge. Based on the comparison results, we propose one of the approaches that best meets the requirements to lay the foundation for future work.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2023-04&engl=0}
}
@inproceedings {INPROC-2023-03,
   author = {Yannick Wilhelm and Peter Reimann and Wolfgang Gauchel and Steffen Klein and Bernhard Mitschang},
   title = {{PUSION- A Generic and Automated Framework for Decision Fusion}},
   booktitle = {Proceedings of the 39th IEEE International Conference on Data Engineering (ICDE 2023)},
   publisher = {IEEE},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   type = {Konferenz-Beitrag},
   month = {April},
   year = {2023},
   keywords = {Classifier ensembles; decision fusion; automated decision fusion; hybrid fault diagnosis},
   language = {Englisch},
   cr-category = {H.2.8 Database Applications},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Combining two or more classifiers into an ensemble and fusing the individual classifier decisions to a consensus decision can improve the accuracy for a classification problem. The classification improvement of the fusion result depends on numerous factors, such as the data set, the combination scenario, the decision fusion algorithm, as well as the prediction accuracies and diversity of the multiple classifiers to be combined. Due to these factors, the best decision fusion algorithm for a given decision fusion problem cannot be generally determined in advance. In order to support the user in combining classifiers and to achieve the best possible fusion result, we propose the PUSION (Python Universal fuSION) framework, a novel generic and automated framework for decision fusion of classifiers. The framework includes 14 decision fusion algorithms and covers a total of eight different combination scenarios for both multi-class and multi-label classification problems. The introduced concept of AutoFusion detects the combination scenario for a given use case, automatically selects the applicable decision fusion algorithms and returns the decision fusion algorithm that leads to the best fusion result. The framework is evaluated with two real-world case studies in the field of fault diagnosis. In both case studies, the consensus decision of multiple classifiers and heterogeneous fault diagnosis methods significantly increased the overall classification accuracy. Our evaluation results show that our framework is of practical relevance and reliably finds the best performing decision fusion algorithm for a given combination task.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2023-03&engl=0}
}
@inproceedings {INPROC-2023-02,
   author = {Dennis Treder-Tschechlov and Peter Reimann and Holger Schwarz and Bernhard Mitschang},
   title = {{Approach to Synthetic Data Generation for Imbalanced Multi-class Problems with Heterogeneous Groups}},
   booktitle = {Tagungsband der 20. Fachtagung Datenbanksysteme f{\"u}r Business, Technologie und Web (BTW 2019)},
   publisher = {GI Gesellschaft f{\"u}r Informatik e.V. (GI)},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   series = {Lecture Notes in Informatics (LNI)},
   pages = {329--351},
   type = {Konferenz-Beitrag},
   month = {M{\"a}rz},
   year = {2023},
   keywords = {Machine learning; classification; data generation; real-world data characteristics},
   language = {Englisch},
   cr-category = {H.2.8 Database Applications},
   ee = {https://dl.gi.de/bitstream/handle/20.500.12116/40320/B3-5.pdf?},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {To benchmark novel classification algorithms, these algorithms should be evaluated on data with characteristics that also appear in real-world use cases. Important data characteristics that often lead to challenges for classification approaches are multi-class imbalance and heterogeneous groups. Heterogeneous groups are sets of real-world entities, where the classification patterns may vary among different groups and where the groups are typically imbalanced in the data. Real-world data that comprise these characteristics are usually not publicly available, e.g., because they constitute sensitive patient information or due to privacy concerns. Further, the manifestations of the characteristics cannot be controlled specifically on real-world data. A more rigorous approach is to synthetically generate data such that different manifestations of the characteristics can be controlled as well. However, existing data generators are not able to generate data that feature both data characteristics, i.e., multi-class imbalance and heterogeneous groups. In this paper, we propose an approach that fills this gap as it allows to synthetically generate data that exhibit both characteristics. We make use of a taxonomy model that organizes real-world entities in domain-specific heterogeneous groups to generate data reflecting the characteristics of these groups. Further, we incorporate probability distributions to reflect the imbalances of multiple classes and groups from real-world use cases. The evaluation shows that our approach can generate data that feature the data characteristics multi-class imbalance and heterogeneous groups and that it allows to control different manifestations of these characteristics.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2023-02&engl=0}
}
@inproceedings {INPROC-2022-06,
   author = {Julian Ziegler and Peter Reimann and Christoph Schulz and Florian Keller and Bernhard Mitschang},
   title = {{A Graph Structure to Discover Patterns in Unstructured Processes of Product Development}},
   booktitle = {Proceedings of the 23rd International Conference on Information Reuse and Integration for Data Science (IRI 2022)},
   publisher = {IEEE},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   type = {Konferenz-Beitrag},
   month = {August},
   year = {2022},
   keywords = {Process Discovery; Unstructured Processes; Process Patterns; Graph Data; Frequent Subgraph Mining},
   language = {Englisch},
   cr-category = {H.2.8 Database Applications},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {A well-known information reuse approach is to exploit event logs for process discovery and optimization. However, process discovery is rarely used for product development projects. This is because information systems in product development, e. g., Product-Lifecycle-Management (PLM) systems, do not provide the event logs required by process discovery algorithms. Additionally, existing algorithms struggle with development projects, as these are unstructured and rich in variety. In this paper, we propose a novel approach to process discovery in order to make it applicable and tailored to product development projects. Instead of using flat event logs, we provide a graph-based data structure that is able to represent both activities and data of product development projects with the dataflow between activities. Based on this structure, we can leverage provenance available in PLM systems. Furthermore, we may use frequent subgraph mining to discover process patterns. Such patterns are well suited to describe different variants and common sub-processes of unstructured processes. Using a prototype, we evaluate this approach and successfully discover prevailing patterns. These patterns may be used by engineers to support their decision-making or help improve the execution of development projects.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2022-06&engl=0}
}
@inproceedings {INPROC-2022-03,
   author = {Marco Spie{\ss} and Peter Reimann and Christian Weber and Bernhard Mitschang},
   title = {{Analysis of Incremental Learning andWindowing to handle Combined Dataset Shifts on Binary Classification for Product Failure Prediction}},
   booktitle = {Proceedings of the 24th International Conference on Enterprise Information Systems (ICEIS 2022)},
   publisher = {SciTePress},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   type = {Konferenz-Beitrag},
   month = {April},
   year = {2022},
   keywords = {Binary Classification; Dataset Shift; Incremental Learning; Product Failure Prediction; Windowing.},
   language = {Englisch},
   cr-category = {H.2.8 Database Applications},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Dataset Shifts (DSS) are known to cause poor predictive performance in supervised machine learning tasks. We present a challenging binary classification task for a real-world use case of product failure prediction. The target is to predict whether a product, e. g., a truck may fail during the warranty period. However, building a satisfactory classifier is difficult, because the characteristics of underlying training data entail two kinds of DSS. First, the distribution of product configurations may change over time, leading to a covariate shift. Second, products gradually fail at different points in time, so that the labels in training data may change, which may a concept shift. Further, both DSS show a trade-off relationship, i. e., addressing one of them may imply negative impacts on the other one. We discuss the results of an experimental study to investigate how different approaches to addressing DSS perform when they are faced with both a covariate and a concept shift. Thereby, we prove that existing approaches, e. g., incremental learning and windowing, especially suffer from the trade-off between both DSS. Nevertheless, we come up with a solution for a data-driven classifier that yields better results than a baseline solution that does not address DSS.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2022-03&engl=0}
}
@inproceedings {INPROC-2022-02,
   author = {Florian Hermann and Bowen Chen and Golsa Ghasemi and Valentin Stegmaier and Thomas Ackermann and Peter Reimann and Sabrina Vogt and Thomas Graf and Michael Weyrich},
   title = {{A Digital Twin Approach for the Prediction of the Geometry of Single Tracks Produced by Laser Metal Deposition}},
   booktitle = {Procedia CIRP: Proceedings of the 55th CIRP Conference on Manufacturing Systems (CIRP CMS 2022)},
   publisher = {Elsevier BV},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   type = {Konferenz-Beitrag},
   month = {Juni},
   year = {2022},
   keywords = {Laser metal deposition; Software-defined manufacturing; Digital Twin; Asset Administration Shell},
   language = {Englisch},
   cr-category = {H.2.8 Database Applications},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Flexible manufacturing processes such as laser metal deposition have a high potential for a production solely defined by software to cope with the current challenges of production systems. The determination of suitable machine parameters for the production of novel materials and geometries however requires extensive experimental effort. Existing simulative approaches do not offer sufficient accuracy to predict the relevant machine parameters in a satisfactory way. This paper presents a new concept, in which we apply a digital twin to provide a step towards a fully software-defined and predictable laser metal deposition process. The presented concept includes relevant data of the machines as well as data-driven machine learning models and physics-based simulation models. This enables a more reliable prediction of geometries of single tracks which was validated on a laser metal deposition machine.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2022-02&engl=0}
}
@inproceedings {INPROC-2021-10,
   author = {Alejandro Villanueva Zacarias and Christian Weber and Peter Reimann and Bernhard Mitschang},
   title = {{AssistML: A Concept to Recommend ML Solutions for Predictive Use Cases}},
   booktitle = {Proceedings of the 8th IEEE International Conference on Data Science and Advanced Analytics (DSAA 2021)},
   address = {Porto, Portugal},
   publisher = {IEEE},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   type = {Konferenz-Beitrag},
   month = {Oktober},
   year = {2021},
   keywords = {Recommender Systems; Machine Learning; Meta Learning},
   language = {Englisch},
   cr-category = {H.2.8 Database Applications},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {The adoption of machine learning (ML) in organizations is characterized by the use of multiple ML software components. Citizen data scientists face practical requirements when building ML systems, which go beyond the known challenges of ML, e. g., data engineering or parameter optimization. They are expected to quickly identify ML system options that strike a suitable trade-off across multiple performance criteria. These options also need to be understandable for non-technical users. Addressing these practical requirements represents a problem for citizen data scientists with limited ML experience. This calls for a method to help them identify suitable ML software combinations. Related work, e. g., AutoML systems, are not responsive enough or cannot balance different performance criteria. In this paper, we introduce AssistML, a novel concept to recommend ML solutions, i. e., software systems with ML models, for predictive use cases. AssistML uses metadata of existing ML solutions to quickly identify and explain options for a new use case. We implement the approach and evaluate it with two exemplary use cases. Results show that AssistML proposes ML solutions that are in line with users{\^a}€™ performance preferences in seconds.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2021-10&engl=0}
}
@inproceedings {INPROC-2021-09,
   author = {Eduard Wagner and Bernd Keller and Peter Reimann and Christoph Gr{\"o}ger and Dieter Spath},
   title = {{Advanced Analytics for Evaluating Critical Joining Technologies in Automotive Body Structures and Body Shops}},
   booktitle = {Proceedings of the 15th CIRP Conference on Intelligent Computation in Manufacturing Engineering (CIRP ICME)},
   publisher = {Elsevier},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   type = {Konferenz-Beitrag},
   month = {Juli},
   year = {2021},
   keywords = {Body Shop; Data Analytics; Data Mining; Advanced Analytics; Machine Learning},
   language = {Englisch},
   cr-category = {H.2.8 Database Applications},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {The product development process within the automotive industry is subject to changing demands due to internal and external influences. These influences and adjustments especially affect the car body and its inherent joining technology, as critical stages of variant creation. However, current literature does not offer a suitable analytical method to identify and assess these critical influences. We propose an advanced analytics approach that combines data mining and machine learning techniques within the car body substructure. The evaluation within the MercedesBenz AG shows that our approach facilitates a quantitative assessment of unknown interdependencies between car body modules and corresponding joining technique},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2021-09&engl=0}
}
@inproceedings {INPROC-2021-08,
   author = {Alexander Birk and Yannick Wilhelm and Simon Dreher and Christian Flack and Peter Reimann and Christoph Gr{\"o}ger},
   title = {{A Real-World Application of Process Mining for Data-Driven Analysis of Multi-Level Interlinked Manufacturing Processes}},
   booktitle = {Procedia CIRP: Proceedings of the 54th CIRP Conference on Manufacturing Systems (CIRP CMS 2021)},
   publisher = {Elsevier},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   type = {Konferenz-Beitrag},
   month = {September},
   year = {2021},
   keywords = {Process Mining; Multi-level Interlinked Manufacturing Process; Heterogeneous Data Sources; Data Integration},
   language = {Englisch},
   cr-category = {H.2.8 Database Applications},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Process Mining (PM) has huge potential for manufacturing process analysis. However, there is little research on practical applications. We investigate a real-world manufacturing process of pneumatic valves. The manufacturing process comprises interlinked events at the superordinate business process level and at the subordinate machine level, making its analysis based on PM challenging.We show how to integrate heterogeneous data sources and give examples how PM enables a deeper understanding of the manufacturing process, thereby helping to uncover optimization potentials. Furthermore, we discuss challenges in data integration and point out limitations of current PM techniques in manufacturing.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2021-08&engl=0}
}
@inproceedings {INPROC-2021-07,
   author = {Julian Ziegler and Peter Reimann and Florian Keller and Bernhard Mitschang},
   title = {{A Metadata Model to Connect Isolated Data Silos and Activities of the CAE Domain}},
   booktitle = {Proceedings of the 33rd International Conference on Advanced Information Systems Engineering (CAiSE)},
   publisher = {Springer International Publishing},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {213--228},
   type = {Konferenz-Beitrag},
   month = {Juni},
   year = {2021},
   keywords = {Metadata Models; Graphs; Computer-aided Engineering},
   language = {Englisch},
   cr-category = {H.2.8 Database Applications},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Computer-aided engineering (CAE) applications support the digital transformation of the manufacturing industry. They facilitate virtual product development and product testing via computer simulations. CAE applications generate vast quantities of heterogeneous data. Domain experts struggle to access and analyze them, because such engineering data are not sufficiently described with metadata. In this paper, we characterize the CAE domain and identify unsolved challenges for a tailored data and metadata management. For instance, work activities in product development projects and their relationships to data are not represented explicitly in current metadata models. We propose a metadata model that addresses all challenges and provides a connected view on all CAE data, metadata, and work activities of development projects. We validate the feasibility of our metadata model through a prototypical implementation and its application to a real-world use case. This verifies that our metadata model addresses the CAE-specific challenges and this way eases the task of domain experts to exploit relevant data.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2021-07&engl=0}
}
@inproceedings {INPROC-2020-57,
   author = {Simon Dreher and Peter Reimann and Christoph Gr{\"o}ger},
   title = {{Application Fields and Research Gaps of Process Mining in Manufacturing Companies}},
   booktitle = {Proceedings of INFORMATIK 2020},
   editor = {R. H. Reussner and A Koziolek and R. Heinrich},
   publisher = {GI Gesellschaft f{\"u}r Informatik e.V. (GI)},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {621--634},
   type = {Konferenz-Beitrag},
   month = {Oktober},
   year = {2020},
   keywords = {Process Mining; Application; Production; Manufacturing; SCOR; Literature Review},
   language = {Englisch},
   cr-category = {H.2.8 Database Applications},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {To survive in global competition with increasing cost pressure, manufacturing companies must continuously optimize their manufacturing-related processes. Thereby, process mining constitutes an important data-driven approach to gain a profound understanding of the actual processes and to identify optimization potentials by applying data mining and machine learning techniques on event data. However, there is little knowledge about the feasibility and usefulness of process mining specifically in manufacturing companies. Hence, this paper provides an overview of potential applications of process mining for the analysis of manufacturing-related processes. We conduct a systematic literature review, classify relevant articles according to the Supply-Chain-Operations-Reference-Model (SCOR-model), identify research gaps, such as domain-specific challenges regarding unstructured, cascaded and non-linear processes or heterogeneous data sources, and give practitioners inspiration which manufacturing-related processes can be analyzed by process mining techniques.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-57&engl=0}
}
@inproceedings {INPROC-2020-56,
   author = {Christian Weber and Peter Reimann},
   title = {{MMP - A Platform to Manage Machine Learning Models in Industry 4.0 Environments}},
   booktitle = {Proceedings of the IEEE 24th International Enterprise Distributed Object Computing Workshop (EDOCW)},
   address = {Eindhoven, The Netherlands},
   publisher = {IEEE},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   type = {Demonstration},
   month = {Juli},
   year = {2020},
   keywords = {Model Management; Machine Learning; Collaborative Data Science},
   language = {Englisch},
   cr-category = {H.3.4 Information Storage and Retrieval Systems and Software},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {In manufacturing environments, machine learning models are being built for several use cases, such as predictive maintenance and product quality control. In this context, the various manufacturing processes, machines, and product variants make it necessary to create and use lots of different machine learning models. This calls for a software system that is able to manage all these diverse machine learning models and associated metadata. However, current model management systems do not associate models with business and domain context to provide non-expert users with tailored functions for model search and discovery. Moreover, none of the existing systems provides a comprehensive overview of all models within an organization. In our demonstration, we present the MMP, our model management platform that addresses these issues. The MMP provides a model metadata extractor, a model registry, and a context manager to store model metadata in a central metadata store. On top of this, the MMP provides frontend components that offer the above-mentioned functionalities. In our demonstration, we show two scenarios for model management in Industry 4.0 environments that illustrate the novel functionalities of the MMP. We demonstrate to the audience how the platform and its metadata, linking models to their business and domain context, help non-expert users to search and discover models. Furthermore, we show how to use MMP's powerful visualizations for model reporting, such as a dashboard and a model landscape view.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-56&engl=0}
}
@inproceedings {INPROC-2020-38,
   author = {Alejandro Villanueva Zacarias and Rachaa Ghabri and Peter Reimann},
   title = {{AD4ML: Axiomatic Design to Specify Machine Learning Solutions for Manufacturing}},
   booktitle = {Proceedings of the 21st International Conference on Information Reuse and Integration for Data Science},
   publisher = {IEEE},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   type = {Konferenz-Beitrag},
   month = {August},
   year = {2020},
   language = {Englisch},
   cr-category = {H.2.8 Database Applications},
   contact = {manufacturing; machine-learning; design},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Machine learning is increasingly adopted in manufacturing use cases, e.g., for fault detection in a production line. Each new use case requires developing its own machine learning (ML) solution. A ML solution integrates different software components to read, process, and analyze all use case data, as well as to finally generate the output that domain experts need for their decision-making. The process to design a system specification for a ML solution is not straight-forward. It entails two types of complexity: (1) The technical complexity of selecting combinations of ML algorithms and software components that suit a use case; (2) the organizational complexity of integrating different requirements from a multidisciplinary team of, e.g., domain experts, data scientists, and IT specialists. In this paper, we propose several adaptations to Axiomatic Design in order to design ML solution specifications that handle these complexities. We call this Axiomatic Design for Machine Learning (AD4ML). We apply AD4ML to specify a ML solution for a fault detection use case and discuss to what extent our approach conquers the above-mentioned complexities. We also discuss how AD4ML facilitates the agile design of ML solutions.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-38&engl=0}
}
@inproceedings {INPROC-2020-32,
   author = {Vitali Hirsch and Peter Reimann and Bernhard Mitschang},
   title = {{Exploiting Domain Knowledge to Address Multi-Class Imbalance and a Heterogeneous Feature Space in Classification Tasks for Manufacturing Data}},
   booktitle = {Proceedings of the 46th International Conference on Very Large Databases (VLDB)},
   editor = {Magdalena Balazinska and Xiaofang Zhou},
   publisher = {ACM Digital Library},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   series = {Proceedings of the VLDB Endowment},
   volume = {13(12)},
   type = {Konferenz-Beitrag},
   month = {August},
   year = {2020},
   language = {Englisch},
   cr-category = {H.2.8 Database Applications},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Classification techniques are increasingly adopted for quality control in manufacturing, e. g., to help domain experts identify the cause of quality issues of defective products. However, real-world data often imply a set of analytical challenges, which lead to a reduced classification performance. Major challenges are a high degree of multi-class imbalance within data and a heterogeneous feature space that arises from the variety of underlying products. This paper considers such a challenging use case in the area of End-of-Line testing, i. e., the final functional test of complex products. Existing solutions to classification or data pre-processing only address individual analytical challenges in isolation. We propose a novel classification system that explicitly addresses both challenges of multi-class imbalance and a heterogeneous feature space together. As main contribution, this system exploits domain knowledge to systematically prepare the training data. Based on an experimental evaluation on real-world data, we show that our classification system outperforms any other classification technique in terms of accuracy. Furthermore, we can reduce the amount of rework required to solve a quality issue of a product.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-32&engl=0}
}
@inproceedings {INPROC-2020-31,
   author = {Yannick Wilhelm and Peter Reimann and Wolfgang Gauchel and Bernhard Mitschang},
   title = {{Overview on Hybrid Approaches to Fault Detection and Diagnosis: Combining Data-driven, Physics-based and Knowledge-based Models}},
   booktitle = {Procedia CIRP: Proceedings of the 14th CIRP Conference on Intelligent Computation in Manufacturing Engineering (CIRP ICME)},
   publisher = {Elsevier BV},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   type = {Konferenz-Beitrag},
   month = {Juli},
   year = {2020},
   keywords = {Fault Detection; Fault Diagnosis; Hybrid Methods; Diagnostics and Maintenance; Knowledge-driven Methods; Machine Learning},
   language = {Englisch},
   cr-category = {H.2.8 Database Applications,     I.2.1 Applications and Expert Systems},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {In this paper, we review hybrid approaches for fault detection and fault diagnosis (FDD) that combine data-driven analysis with physics-based and knowledge-based models to overcome a lack of data and to increase the FDD accuracy. We categorize these hybrid approaches according to the steps of an extended common workflow for FDD. This gives practitioners indications of which kind of hybrid FDD approach they can use in their application.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-31&engl=0}
}
@inproceedings {INPROC-2020-20,
   author = {Yannick Wilhelm and Ulf Schreier and Peter Reimann and Bernhard Mitschang and Holger Ziekow},
   title = {{Data Science Approaches to Quality Control in Manufacturing: A Review of Problems, Challenges and Architecture}},
   booktitle = {Springer Proceedings Series Communications in Computer and Information Science (CCIS)},
   publisher = {Springer},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   type = {Konferenz-Beitrag},
   month = {Oktober},
   year = {2020},
   keywords = {Data Science; Machine Learning; Quality Control; Challenges; Functional Architecture},
   language = {Englisch},
   cr-category = {H.2.8 Database Applications},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Manufacturing environments are characterized by non-stationary processes, constantly varying conditions, complex process interdependencies, and a high number of product variants. These and other aspects pose several challenges for common machine learning algorithms to achieve reliable and accurate predictions. This overview and vision paper provides a comprehensive list of common problems and challenges for data science approaches to quality control in manufacturing. We have derived these problems and challenges by inspecting three real-world use cases in the eld of product quality control and via a comprehensive literature study. We furthermore associate the identi ed problems and challenges to individual layers and components of a functional setup, as it can be found in manufacturing environments today. Additionally, we extend and revise this functional setup and this way propose our vision of a future functional software architecture. This functional architecture represents a visionary blueprint for solutions that are able to address all challenges for data science approaches in manufacturing quality control.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-20&engl=0}
}
@inproceedings {INPROC-2020-19,
   author = {Christian Weber and Pascal Hirmer and Peter Reimann},
   title = {{A Model Management Platform for Industry 4.0 - Enabling Management of Machine Learning Models in Manufacturing Environments}},
   booktitle = {Proceedings of the 23rd International Conference on Business Information Systems (BIS)},
   editor = {Witold Abramowicz and Rainer Alt and Gary Klein and Adrian Paschke and Kurt Sandkuhl},
   publisher = {Springer International Publishing},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   series = {Lecture Notes in Business Information Processing},
   type = {Konferenz-Beitrag},
   month = {November},
   year = {2020},
   issn = {1865-1348},
   keywords = {Model Management; Machine Learning; Metadata Tracking},
   language = {Englisch},
   cr-category = {H.3.4 Information Storage and Retrieval Systems and Software},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Industry 4.0 use cases such as predictive maintenance and product quality control make it necessary to create, use and maintain a multitude of di erent machine learning models. In this setting, model management systems help to organize models. However, concepts for model management systems currently focus on data scientists, but do not support non-expert users such as domain experts and business analysts. Thus, it is dicult for them to reuse existing models for their use cases. In this paper, we address these challenges and present an architecture, a metadata schema and a corresponding model management platform.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-19&engl=0}
}
@inproceedings {INPROC-2020-18,
   author = {Julian Ziegler and Peter Reimann and Florian Keller and Bernhard Mitschang},
   title = {{A Graph-based Approach to Manage CAE Data in a Data Lake}},
   booktitle = {Procedia CIRP: Proceedings of the 53rd CIRP Conference on Manufacturing Systems (CIRP CMS 2020)},
   publisher = {Elsevier},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   type = {Konferenz-Beitrag},
   month = {Juli},
   year = {2020},
   language = {Englisch},
   cr-category = {H.2.8 Database Applications},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Computer-aided engineering (CAE) applications generate vast quantities of heterogeneous data. Domain experts often fail to explore and analyze these data, because they are not integrated across di erent applications. Existing data management solutions are rather tailored to scientific applications. In our approach, we tackle this issue by combining a data lake solution with graph-based metadata management. This provides a holistic view of all CAE data and of the data-generating applications in one interconnected structure. Based on a prototypical implementation, we discuss how this eases the task of domain experts to explore and extract data for further analyses.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-18&engl=0}
}
@inproceedings {INPROC-2020-17,
   author = {Vitali Hirsch and Peter Reimann and Bernhard Mitschang},
   title = {{Incorporating Economic Aspects into Recommendation Ranking to Reduce Failure Costs}},
   booktitle = {Procedia CIRP: Proceedings of the 53rd CIRP Conference on Manufacturing Systems (CIRP CMS 2020)},
   publisher = {Elsevier},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   type = {Konferenz-Beitrag},
   month = {Juli},
   year = {2020},
   keywords = {decision support; predictive analytics; quality control; End-of-Line testing; classification; fault isolation; failure costs},
   language = {Englisch},
   cr-category = {H.2.8 Database Applications},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Machine learning approaches for manufacturing usually o er recommendation lists, e.g., to support humans in fault diagnosis. For instance, if a product does not pass the final check after the assembly, a recommendation list may contain likely faulty product components to be replaced. Thereby, the list ranks these components using their probabilities. However, these probabilities often di er marginally, while economic impacts, e.g., the costs for replacing components, di er significantly. We address this issue by proposing an approach that incorporates costs to re-rank a list. Our evaluation shows that this approach reduces fault-related costs when using recommendation lists to support human labor.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-17&engl=0}
}
@inproceedings {INPROC-2020-06,
   author = {Cornelia Kiefer and Peter Reimann and Bernhard Mitschang},
   title = {{Prevent Low-Quality Analytics by Automatic Selection of the Best-Fitting Training Data}},
   booktitle = {Proceedings of the 53rd Hawaii International Conference on System Sciences (HICSS)},
   address = {Maui, Hawaii, USA},
   publisher = {Online},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {1036--1045},
   type = {Konferenz-Beitrag},
   month = {Januar},
   year = {2020},
   isbn = {978-0-9981331-3-3},
   keywords = {data quality; domain-specific data analysis; text analysis; text similarity; training data},
   language = {Englisch},
   cr-category = {I.2.7 Natural Language Processing},
   ee = {https://scholarspace.manoa.hawaii.edu/bitstream/10125/63868/0103.pdf},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Data analysis pipelines consist of a sequence of various analysis tools. Most of these tools are based on supervised machine learning techniques and thus rely on labeled training data. Selecting appropriate training data has a crucial impact on analytics quality. Yet, most of the times, domain experts who construct analysis pipelines neglect the task of selecting appropriate training data. They rely on default training data sets, e.g., since they do not know which other training data sets exist and what they are used for. Yet, default training data sets may be very different from the domain-specific input data that is to be analyzed, leading to low-quality results. Moreover, these input data sets are usually unlabeled. Thus, information on analytics quality is not measurable with evaluation metrics. Our contribution comprises a method that (1) indicates the expected quality to the domain expert while constructing the analysis pipeline, without need for labels and (2) automatically selects the best-fitting training data. It is based on a measurement of the similarity between input and training data. In our evaluation, we consider the part-of-speech tagger tool and show that Latent Semantic Analysis (LSA) and Cosine Similarity are suited as indicators for the quality of analysis results and as basis for an automatic selection of the best-fitting training data.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-06&engl=0}
}
@inproceedings {INPROC-2019-32,
   author = {Vitali Hirsch and Peter Reimann and Bernhard Mitschang},
   title = {{Data-Driven Fault Diagnosis in End-of-Line Testing of Complex Products}},
   booktitle = {Proceedings of the 6th IEEE International Conference on Data Science and Advanced Analytics (DSAA 2019), Washington, D.C., USA},
   publisher = {IEEE Xplore},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   type = {Konferenz-Beitrag},
   month = {Oktober},
   year = {2019},
   keywords = {decision support; classification; ensembles; automotive; fault diagnosis; quality management; sampling},
   language = {Englisch},
   cr-category = {H.2.8 Database Applications},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Machine learning approaches may support various use cases in the manufacturing industry. However, these approaches often do not address the inherent characteristics of the real manufacturing data at hand. In fact, real data impose analytical challenges that have a strong influence on the performance and suitability of machine learning methods. This paper considers such a challenging use case in the area of End-of-Line testing, i.e., the final functional check of complex products after the whole assembly line. Here, classification approaches may be used to support quality engineers in identifying faulty components of defective products. For this, we discuss relevant data sources and their characteristics, and we derive the resulting analytical challenges. We have identified a set of sophisticated data-driven methods that may be suitable to our use case at first glance, e.g., methods based on ensemble learning or sampling. The major contribution of this paper is a thorough comparative study of these methods to identify whether they are able to cope with the analytical challenges. This comprises the discussion of both fundamental theoretical aspects and major results of detailed experiments we have performed on the real data of our use case.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2019-32&engl=0}
}
@inproceedings {INPROC-2019-16,
   author = {Marco Spie{\ss} and Peter Reimann},
   title = {{Angepasstes Item Set Mining zur gezielten Steuerung von Bauteilen in der Serienfertigung von Fahrzeugen}},
   booktitle = {Tagungsband der 18. Konferenz Datenbanksysteme f{\"u}r Business, Technologie und Web (BTW 2019)},
   publisher = {Gesellschaft f{\"u}r Informatik (GI)},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   series = {Lecture Notes in Informatics (LNI)},
   pages = {119--128},
   type = {Konferenz-Beitrag},
   month = {M{\"a}rz},
   year = {2019},
   language = {Deutsch},
   cr-category = {H.2.8 Database Applications},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Qualit{\"a}tsprobleme im Bereich Fahrzeugbau k{\"o}nnen nicht nur zum Imageverlust des Unternehmens f{\"u}hren, sondern auch mit entsprechend hohen Kosten einhergehen. Wird ein Bauteil als Verursacher eines Qualit{\"a}tsproblems identifiziert, muss dessen Verbau gestoppt werden. Mit einer Datenanalyse kann herausgefunden werden, welche Fahrzeugkonfigurationen Probleme mit diesem fehlerverursachenden Bauteil haben. Im Rahmen der dom{\"a}nenspezifischen Problemstellung wird in diesem Beitrag die Anwendbarkeit von Standardalgorithmen aus dem Bereich Data-Mining untersucht. Da die Analyseergebnisse auf Standardausstattungen hinweisen, sind diese nicht zielf{\"u}hrend. F{\"u}r dieses Businessproblem von Fahrzeugherstellern haben wir einen Data-Mining Algorithmus entwickelt, der das Vorgehen des Item Set Mining der Assoziationsanalyse an das dom{\"a}nenspezifische Problem anpasst. Er unterscheidet sich zum klassischen Apriori-Algorithmus in der Beschneidung des Ergebnisraumes sowie in der nachfolgenden Aufbereitung und Verwendungsweise der Item Sets. Der Algorithmus ist allgemeing{\"u}ltig f{\"u}r alle Fahrzeughersteller anwendbar. Die Ergebnisse sind anhand eines realen Anwendungsfalls evaluiert worden, bei dem durch die Anwendung unseres Algorithmus 87\% der Feldausf{\"a}lle verhindert werden k{\"o}nnen.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2019-16&engl=0}
}
@inproceedings {INPROC-2019-10,
   author = {Christian Weber and Pascal Hirmer and Peter Reimann and Holger Schwarz},
   title = {{A New Process Model for the Comprehensive Management of Machine Learning Models}},
   booktitle = {Proceedings of the 21st International Conference on Enterprise Information Systems (ICEIS); Heraklion, Crete, Greece, May 3-5, 2019},
   editor = {Joaquim Filipe and Michal Smialek and Alexander Brodsky and Slimane Hammoudi},
   publisher = {SciTePress},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {415--422},
   type = {Konferenz-Beitrag},
   month = {Mai},
   year = {2019},
   isbn = {978-989-758-372-8},
   doi = {10.5220/0007725304150422},
   keywords = {Model Management; Machine Learning; Analytics Process},
   language = {Englisch},
   cr-category = {I.2 Artificial Intelligence},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {The management of machine learning models is an extremely challenging task. Hundreds of prototypical models are being built and just a few are mature enough to be deployed into operational enterprise information systems. The lifecycle of a model includes an experimental phase in which a model is planned, built and tested. After that, the model enters the operational phase that includes deploying, using, and retiring it. The experimental phase is well known through established process models like CRISP-DM or KDD. However, these models do not detail on the interaction between the experimental and the operational phase of machine learning models. In this paper, we provide a new process model to show the interaction points of the experimental and operational phase of a machine learning model. For each step of our process, we discuss according functions which are relevant to managing machine learning models.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2019-10&engl=0}
}
@inproceedings {INPROC-2019-08,
   author = {Cornelia Kiefer and Peter Reimann and Bernhard Mitschang},
   title = {{A Hybrid Information Extraction Approach Exploiting Structured Data Within a Text Mining Process}},
   booktitle = {18. Fachtagung des GI-Fachbereichs ,,Datenbanken und Informationssysteme (DBIS), 4.-8. M{\"a}rz 2019, Rostock, Germany, Proceedings.},
   editor = {Torsten Grust and Felix Naumann and Alexander B{\"o}hm and Wolfgang Lehner and Theo H{\"a}rder and Erhard et al. Rahm},
   address = {Bonn},
   publisher = {Gesellschaft f$\backslash$``{u}r Informatik e.V. (GI)},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {149--168},
   type = {Konferenz-Beitrag},
   month = {M{\"a}rz},
   year = {2019},
   keywords = {information extraction; clustering; text mining; free text fields},
   language = {Englisch},
   cr-category = {I.2.7 Natural Language Processing},
   ee = {https://doi.org/10.18420/btw2019-10},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Many data sets encompass structured data fields with embedded free text fields. The text fields allow customers and workers to input information which cannot be encoded in structured fields. Several approaches use structured and unstructured data in isolated analyses. The result of isolated mining of structured data fields misses crucial information encoded in free text. The result of isolated text mining often mainly repeats information already available from structured data. The actual information gain of isolated text mining is thus limited. The main drawback of both isolated approaches is that they may miss crucial information. The hybrid information extraction approach suggested in this paper adresses this issue. Instead of extracting information that in large parts was already available beforehand, it extracts new, valuable information from free texts. Our solution exploits results of analyzing structured data within the text mining process, i.e., structured information guides and improves the information extraction process on textual data. Our main contributions comprise the description of the concept of hybrid information extraction as well as a prototypical implementation and an evaluation with two real-world data sets from aftersales and production with English and German free text fields.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2019-08&engl=0}
}
@inproceedings {INPROC-2018-54,
   author = {Alejandro Villanueva Zacarias and Peter Reimann and Bernhard Mitschang},
   title = {{A Framework to Guide the Selection and Configuration of Machine-Learning-based Data Analytics Solutions in Manufacturing}},
   booktitle = {Proceedings of the 51st CIRP Conference on Manufacturing Systems (CIRP CMS 2018)},
   publisher = {Elsevier BV},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {153--158},
   type = {Konferenz-Beitrag},
   month = {Mai},
   year = {2018},
   keywords = {data analytics; machine learning; learning algorithms; generative design},
   language = {Englisch},
   cr-category = {H.2.8 Database Applications},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Users in manufacturing willing to apply machine-learning-based (ML-based) data analytics face challenges related to data quality or to the selection and configuration of proper ML algorithms. Current approaches are either purely empirical or reliant on technical data. This makes understanding and comparing candidate solutions difficult, and also ignores the way it impacts the real application problem. In this paper, we propose a framework to generate analytics solutions based on a systematic profiling of all aspects involved. With it, users can visually and systematically explore relevant alternatives for their specific scenario, and obtain recommendations in terms of costs, productivity, results quality, or execution time.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2018-54&engl=0}
}
@inproceedings {INPROC-2018-53,
   author = {Vitali Hirsch and Peter Reimann and Oliver Kirn and Bernhard Mitschang},
   title = {{Analytical Approach to Support Fault Diagnosis and Quality Control in End-Of-Line Testing}},
   booktitle = {Proceedings of the 51st CIRP Conference on Manufacturing Systems (CIRP CMS 2018)},
   publisher = {Elsevier BV},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {1333--1338},
   type = {Konferenz-Beitrag},
   month = {Mai},
   year = {2018},
   keywords = {Analytics; decision support; recommendation system; fault diagnosis; quality control},
   language = {Englisch},
   cr-category = {H.2.8 Database Applications},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Operators in end-of-line testing of assembly lines often try out multiple solutions until they can solve a product quality issue. This calls for a decision support system based on data analytics that effectively helps operators in fault diagnosis and quality control. However, existing analytical approaches do not consider the specific data characteristics being prevalent in the area of End-of-Line (EoL) testing. We address this issue by proposing an analytical approach that is tailored to EoL testing. We show how to implement this approach in a real-world use case of a large automotive manufacturer, which reveals its potential to reduce unnecessary rework.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2018-53&engl=0}
}
@inproceedings {INPROC-2015-33,
   author = {Pascal Hirmer and Peter Reimann and Matthias Wieland and Bernhard Mitschang},
   title = {{Extended Techniques for Flexible Modeling and Execution of Data Mashups}},
   booktitle = {Proceedings of the 4th International Conference on Data Management Technologies and Applications (DATA)},
   editor = {Markus Helfert and Andreas Holzinger and Orlando Belo and Chiara Francalanci},
   address = {Colmar},
   publisher = {SciTePress},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {111--122},
   type = {Konferenz-Beitrag},
   month = {Juli},
   year = {2015},
   isbn = {978-989-758-103-8},
   keywords = {Data Mashups, Ad-hoc Integration, Patterns, Data Flow},
   language = {Englisch},
   cr-category = {E.1 Data Structures,     E.5 Data Files},
   contact = {pascal.hirmer@ipvs.uni-stuttgart.de},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Today, a multitude of highly-connected applications and information systems hold, consume and produce huge amounts of heterogeneous data. The overall amount of data is even expected to dramatically increase in the future. In order to conduct, e.g., data analysis, visualizations or other value-adding scenarios, it is necessary to integrate specific, relevant parts of data into a common source. Due to oftentimes changing environments and dynamic requests, this integration has to support ad-hoc and flexible data processing capabilities. Furthermore, an iterative and explorative trial-and-error integration based on different data sources has to be possible. To cope with these requirements, several data mashup platforms have been developed in the past. However, existing solutions are mostly non-extensible, monolithic systems or applications with many limitations regarding the mentioned requirements. In this paper, we introduce an approach that copes with these issues (i) by the introduction of patterns to enable decoupling from implementation details, (ii) by a cloud-ready approach to enable availability and scalability, and (iii) by a high degree of flexibility and extensibility that enables the integration of heterogeneous data as well as dynamic (un-)tethering of data sources. We evaluate our approach using runtime measurements of our prototypical implementation.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2015-33&engl=0}
}
@inproceedings {INPROC-2014-76,
   author = {Peter Reimann and Holger Schwarz and Bernhard Mitschang},
   title = {{A Pattern Approach to Conquer the Data Complexity in Simulation Workflow Design}},
   booktitle = {Proceedings of OnTheMove Federated Conferences and Workshops (OTM), 22nd International Conference on Cooperative Information Systems (CoopIS 2014)},
   editor = {R. Meersman et al.},
   address = {Amantea, Italy},
   publisher = {Springer Berlin Heidelberg},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   series = {LNCS},
   volume = {8841},
   pages = {21--38},
   type = {Konferenz-Beitrag},
   month = {Oktober},
   year = {2014},
   keywords = {Data Provisioning; Data Management Patterns; SIMPL; Simulation Workflow; Simulation Workflow Design; Workflow; Workflow Design},
   language = {Englisch},
   cr-category = {H.2.5 Heterogeneous Databases,     H.2.8 Database Applications,     H.4.1 Office Automation},
   contact = {Peter Reimann Peter.Reimann@ipvs.uni-stuttgart.de},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Scientific workflows may be used to enable the collaborative implementation of scientific applications across various domains. Since each domain has its own requirements and solutions for data handling, such workflows often have to deal with a highly heterogeneous data environment. This results in an increased complexity of workflow design. As scientists typically design their scientific workflows on their own, this complexity hinders them to concentrate on their core issue, namely the experiments, analyses, or simulations they conduct. In this paper, we present a novel approach to a pattern-based abstraction support for the complex data management in simulation workflows that goes beyond related work in similar research areas. A pattern hierarchy with different abstraction levels enables a separation of concerns according to the skills of different persons involved in workflow design. The goal is that scientists are no longer obliged to specify low-level details of data management in their workflows. We discuss the advantages of this approach and show to what extent it reduces the complexity of simulation workflow design. Furthermore, we illustrate how to map patterns onto executable workflows. Based on a prototypical implementation of three real-world simulations, we evaluate our approach according to relevant requirements.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2014-76&engl=0}
}
@inproceedings {INPROC-2014-52,
   author = {Peter Reimann and Tim Waizenegger and Matthias Wieland and Holger Schwarz},
   title = {{Datenmanagement in der Cloud f{\"u}r den Bereich Simulationen und Wissenschaftliches Rechnen}},
   booktitle = {Proceedings des 2. Workshop Data Management in the Cloud auf der 44. Jahrestagung der Gesellschaft f{\"u}r Informatik e.V. (GI)},
   editor = {Gesellschaft f{\"u}r Informatik e.V. (GI)},
   address = {Stuttgart, Deutschland},
   publisher = {Lecture Notes in Informatics (LNI)},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   type = {Workshop-Beitrag},
   month = {September},
   year = {2014},
   language = {Deutsch},
   cr-category = {H.2.5 Heterogeneous Databases,     H.2.8 Database Applications,     H.4.1 Office Automation},
   contact = {Peter Reimann Peter.Reimann@ipvs.uni-stuttgart.de},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware;     Universit{\"a}t Stuttgart, Institut f{\"u}r Architektur von Anwendungssystemen},
   abstract = {F{\"u}r Organisationen, die Simulationen nicht als ihr Kerngesch{\"a}ft verstehen und sie daher eher sporadisch durchf{\"u}hren, lohnt sich der Betrieb einer eigenen Recheninfrastruktur nur selten. Dies betrifft z.B. kleine und mittlere Unternehmen sowie einige wissenschaftliche Institutionen. Besserung k{\"o}nnen {\"o}ffentliche Cloud-Infrastrukturen als Plattform f{\"u}r die Ausf{\"u}hrung von Simulationen verschaffen. Das Datenmanagement in der Cloud ist aber speziell f{\"u}r den Bereich Simulationen noch weitgehend unerforscht. In diesem Beitrag identifizieren wir daher noch offene Fragestellungen bzgl. des Datenmanagements von Simulationen in der Cloud. Dies betrifft vor allem die Datenbereitstellung und inwieweit nutzer- und simulationsspezifische Anforderungen an das Datenmanagement in der Cloud eingehalten werden k{\"o}nnen. Wir untersuchen Technologien, welche sich diesen Fragestellungen widmen, und diskutieren, ob und wie sie in der Cloud sowie f{\"u}r Simulationen einsetzbar sind. Weiterhin skizzieren wir wichtige zuk{\"u}nftige Forschungsthemen.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2014-52&engl=0}
}
@inproceedings {INPROC-2014-51,
   author = {Peter Reimann and Holger Schwarz},
   title = {{Simulation Workflow Design Tailor-Made for Scientists}},
   booktitle = {Proceedings of the 26th International Conference on Scientific and Statistical Database Management},
   address = {Aalborg, Denmark},
   publisher = {ACM},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   type = {Demonstration},
   month = {Juni},
   year = {2014},
   keywords = {Data Provisioning; Data Management Patterns; Simulation Workflow; Simulation Workflow Design},
   language = {Englisch},
   cr-category = {H.2.5 Heterogeneous Databases,     H.2.8 Database Applications,     H.4.1 Office Automation},
   contact = {Peter Reimann Peter.Reimann@ipvs.uni-stuttgart.de},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Scientific workflows have to deal with highly heterogeneous data environments. In particular, they have to carry out complex data provisioning tasks that filter and transform heterogeneous input data in such a way that underlying tools or services can ingest them. This results in a high complexity of workflow design. Scientists often want to design their workflows on their own, but usually do not have the necessary skills to cope with this complexity. Therefore, we have developed a pattern-based approach to workflow design, thereby mainly focusing on workflows that realize numeric simulations. This approach removes the burden from scientists to specify low-level details of data provisioning. In this demonstration, we apply a prototype implementation of our approach to various use cases and show how it makes simulation workflow design tailor-made for scientists.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2014-51&engl=0}
}
@inproceedings {INPROC-2014-50,
   author = {Peter Reimann and Holger Schwarz and Bernhard Mitschang},
   title = {{Data Patterns to Alleviate the Design of Scientific Workflows Exemplified by a Bone Simulation}},
   booktitle = {Proceedings of the 26th International Conference on Scientific and Statistical Database Management},
   address = {Aalborg, Denmark},
   publisher = {ACM},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   type = {Konferenz-Beitrag},
   month = {Juni},
   year = {2014},
   keywords = {Data Provisioning; Data Management Patterns; Workflow; SIMPL; Simulation Workflow; BPEL; WS-BPEL},
   language = {Englisch},
   cr-category = {H.2.5 Heterogeneous Databases,     H.2.8 Database Applications,     H.4.1 Office Automation},
   contact = {Peter Reimann Peter.Reimann@ipvs.uni-stuttgart.de},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Scientific workflows often have to process huge data sets in a multiplicity of data formats. For that purpose, they typically embed complex data provisioning tasks that transform these heterogeneous data into formats the underlying tools or services can handle. This results in an increased complexity of workflow design. As scientists typically design their scientific workflows on their own, this complexity hinders them to concentrate on their core issue, namely the experiments, analyses, or simulations they conduct. In this paper, we present the core idea of a pattern-based approach to alleviate the design of scientific workflows. This approach is particularly targeted at the needs of scientists. We exemplify and assess the pattern-based design approach by applying it to a complex scientific workflow realizing a real-world simulation of structure changes in bones.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2014-50&engl=0}
}
@inproceedings {INPROC-2013-38,
   author = {Stefan Silcher and Jan K{\"o}nigsberger and Peter Reimann and Bernhard Mitschang},
   title = {{Cooperative service registries for the service-based Product Lifecycle Management architecture}},
   booktitle = {Proceedings of the 17th IEEE International Conference on Computer Supported Cooperative Work in Design (CSCWD '13)},
   publisher = {IEEE Xplore},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {439--446},
   type = {Konferenz-Beitrag},
   month = {Juni},
   year = {2013},
   isbn = {978-1-4673-6083-8},
   doi = {10.1109/CSCWD.2013.6581003},
   keywords = {Collaborative Product Lifecycle Management; Cooperative Service Registries; Enterprise Service Bus; Service-oriented Architecture},
   language = {Englisch},
   cr-category = {D.2.11 Software Engineering Software Architectures,     D.2.13 Software Engineering Reusable Software,     H.3.4 Information Storage and Retrieval Systems and Software,     J.1 Administration Data Processing},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Product Lifecycle Management (PLM) comprises many different tasks across multiple domains, such as product development and production. Thus, multidisciplinary engineering teams have to collaborate to successfully design and produce products. Nowadays, engineers are supported with many software solutions, which are tailored to the work of each engineer. The problem is the missing or bad integration between these IT solutions, which leads to noncontinuous processes and an insufficient cooperation. The Service-oriented Architecture (SOA) supports the needed flexible integration of applications based on services and moreover an automation and integration of processes via workflows. In previous work, we proposed a service-oriented PLM architecture that provides these benefits and supports continuous processes. Thereby, services of different domains and phases of the product life cycle need to collaborate in a distributed fashion. In this paper, we systematically identify, define and rate representative models for the management of corresponding distributed service registries, which enable an efficient collaboration of services. Based on a prototypical implementation of the best-rated model in a layout redesign scenario, we assess our approach for its suitability in PLM. The selected service registry model provides transparent access to all services of different domains and shows the ease of integrating new applications into the product life cycle. It thereby enables an improved cooperation of engineers across various domains to define cross-domain processes.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2013-38&engl=0}
}
@inproceedings {INPROC-2013-02,
   author = {Peter Reimann and Holger Schwarz},
   title = {{Datenmanagementpatterns in Simulationsworkflows}},
   booktitle = {Proceedings der 15. GI-Fachtagung Datenbanksysteme f{\"u}r Business, Technologie und Web (BTW 2013)},
   editor = {Gesellschaft f{\"u}r Informatik (GI)},
   address = {Magdeburg},
   publisher = {Lecture Notes in Informatics (LNI)},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   series = {Gesellschaft f{\"u}r Informatik (GI)},
   pages = {279--293},
   type = {Konferenz-Beitrag},
   month = {M{\"a}rz},
   year = {2013},
   keywords = {Datenbereitstellung; Datenmanagementpatterns; Workflow; SIMPL; Simulationsworkflow; BPEL; WS-BPEL},
   language = {Deutsch},
   cr-category = {H.2.5 Heterogeneous Databases,     H.2.8 Database Applications,     H.4.1 Office Automation},
   contact = {Peter Reimann Peter.Reimann@ipvs.uni-stuttgart.de},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Simulationsworkflows m{\"u}ssen oftmals gro{\ss}e Datenmengen verarbeiten, die in einer Vielzahl propriet{\"a}rer Formate vorliegen. Damit diese Daten von den im Workflow eingebundenen Programmen und Diensten verarbeitet werden k{\"o}nnen, m{\"u}ssen sie in passende Formate transformiert werden. Dies erh{\"o}ht die Komplexit{\"a}t der Workflowmodellierung, welche i.d.R. durch die Wissenschaftler selbst erfolgt. Dadurch k{\"o}nnen sich diese weniger auf den Kern der eigentlichen Simulation konzentrieren. Zur Behebung dieses Defizits schlagen wir einen Ansatz vor, mit dem die Aktivit{\"a}ten zur Datenbereitstellung in Simulationsabl{\"a}ufen abstrakt modelliert werden k{\"o}nnen. Wissenschaftler sollen keine Implementierungsdetails, sondern lediglich die Kernaspekte der Datenbereitstellung in Form von Patterns beschreiben. Die Spezifikation der Patterns soll dabei m{\"o}glichst in der Sprache der mathematischen Simulationsmodelle erfolgen, mit denen Wissenschaftler vertraut sind. Eine Erweiterung des Workflowsystems bildet die Patterns automatisch auf ausf{\"u}hrbare Workflowfragmente ab, welche die Datenbereitstellung umsetzen. Dies alles reduziert die Komplexit{\"a}t der Modellierung von Simulationsworkflows und erh{\"o}ht die Produktivit{\"a}t der Wissenschaftler.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2013-02&engl=0}
}
@inproceedings {INPROC-2011-42,
   author = {Jorge Minguez and Peter Reimann and Sema Zor},
   title = {{Event-driven Business Process Management in Engineer-to-Order Supply Chains}},
   booktitle = {Proceedings of the 15th International Conference on Computer Supported Cooperative Work in Design},
   publisher = {IEEE},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {1--8},
   type = {Konferenz-Beitrag},
   month = {Juni},
   year = {2011},
   keywords = {Event-driven Architecture; Service-oriented Architecture; SOA; EDA; Engineer-to-Order; ETO; Supply chain},
   language = {Englisch},
   cr-category = {D.2.11 Software Engineering Software Architectures,     D.2.13 Software Engineering Reusable Software},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware;     Universit{\"a}t Stuttgart, Institut f{\"u}r Architektur von Anwendungssystemen},
   abstract = {Integration efforts in today’s manufacturing environments tend to enable service-based communication interfaces between enterprise and manufacturing systems. Constantly changing business conditions demand a high level of flexibility in business processes as well as an adaptive and fully interoperable IT infrastructure. The principles of reusability and loosely-coupled services have driven Service Oriented Architecture (SOA) to become the most used paradigm for software design at the business level. In a manufacturing environment, event-driven architectures (EDA) are often employed for managing information flows across different production systems. The timely propagation of business-relevant events is a fundamental requirement in Engineer-to-Order (ETO) enterprises, which require a high level of transparency in their supply chains. Agility is one of the top priorities for ETO manufacturers in order to react to turbulent scenarios. Therefore, the main challenge for ETO supply chains is to identify and propagate events across the ETO logistics network and integrate these into the manufacturer business processes. We present how an existing service-oriented integration platform for manufacturing can be used to fill the gap between EDA-based manufacturing environments of an ETO supply chain and SOA-based manufacturer business processes. In this paper, we discuss the benefits of the Business Process Execution Language (BPEL) as vehicle for this integration. The adoption of BPEL will enable an efficient and effective reaction to turbulent manufacturing scenarios in an ETO supply chain.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2011-42&engl=0}
}
@inproceedings {INPROC-2011-07,
   author = {Peter Reimann and Michael Reiter and Holger Schwarz and Dimka Karastoyanova and Frank Leymann},
   title = {{SIMPL - A Framework for Accessing External Data in Simulation Workflows}},
   booktitle = {Datenbanksysteme f{\"u}r Business, Technologie und Web (BTW 2011), 14. Fachtagung des GI-Fachbereichs „Datenbanken und Informationssysteme“ (DBIS), Proceedings, 02.-04. M{\"a}rz 2011, Kaiserslautern, Germany},
   editor = {Gesellschaft f{\"u}r Informatik (GI)},
   publisher = {Lecture Notes in Informatics (LNI)},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   series = {Series of the Gesellschaft f{\"u}r Informatik (GI)},
   volume = {180},
   pages = {534--553},
   type = {Konferenz-Beitrag},
   month = {M{\"a}rz},
   year = {2011},
   isbn = {978-3-88579-274-1},
   keywords = {Data Provisioning; Workflow; Scientific Workflow; Simulation Workflow; BPEL; WS-BPEL; SIMPL},
   language = {Englisch},
   cr-category = {H.2.8 Database Applications,     H.4.1 Office Automation},
   contact = {Peter Reimann Peter.Reimann@ipvs.uni-stuttgart.de},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware;     Universit{\"a}t Stuttgart, Institut f{\"u}r Architektur von Anwendungssystemen},
   abstract = {Adequate data management and data provisioning are among the most important topics to cope with the information explosion intrinsically associated with simulation applications. Today, data exchange with and between simulation applications is mainly accomplished in a file-style manner. These files show proprietary formats and have to be transformed according to the specific needs of simulation applications. Lots of effort has to be spent to find appropriate data sources and to specify and implement data transformations. In this paper, we present SIMPL – an extensible framework that provides a generic and consolidated abstraction for data management and data provisioning in simulation workflows. We introduce extensions to workflow languages and show how they are used to model the data provisioning for simulation workflows based on data management patterns. Furthermore, we show how the framework supports a uniform access to arbitrary external data in such workflows. This removes the burden from engineers and scientists to specify low-level details of data management for their simulation applications and thus boosts their productivity.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2011-07&engl=0}
}
@article {ART-2023-04,
   author = {Alejandro Gabriel Villanueva Zacarias and Peter Reimann and Christian Weber and Bernhard Mitschang},
   title = {{AssistML: An Approach to Manage, Recommend and Reuse ML Solutions}},
   journal = {International Journal of Data Science and Analytics (JDSA)},
   publisher = {Springer Nature},
   type = {Artikel in Zeitschrift},
   month = {Juli},
   year = {2023},
   keywords = {Meta-learning; Machine learning; AutoML; Metadata; Recommender systems},
   language = {Englisch},
   cr-category = {H.2.8 Database Applications},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {The adoption of machine learning (ML) in organizations is characterized by the use of multiple ML software components. When building ML systems out of these software components, citizen data scientists face practical requirements which go beyond the known challenges of ML, e.g., data engineering or parameter optimization. They are expected to quickly identify ML system options that strike a suitable trade-off across multiple performance criteria. These options also need to be understandable for non-technical users. Addressing these practical requirements represents a problem for citizen data scientists with limited ML experience. This calls for a concept to help them identify suitable ML software combinations. Related work, e.g., AutoML systems, are not responsive enough or cannot balance different performance criteria. This paper explains how AssistML, a novel concept to recommend ML solutions, i.e., software systems with ML models, can be used as an alternative for predictive use cases. Our concept collects and preprocesses metadata of existing ML solutions to quickly identify the ML solutions that can be reused in a new use case. We implement AssistML and evaluate it with two exemplary use cases. Results show that AssistML can recommend ML solutions in line with users{\^a}€™ performance preferences in seconds. Compared to AutoML, AssistML offers citizen data scientists simpler, intuitively explained ML solutions in considerably less time. Moreover, these solutions perform similarly or even better than AutoML models.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2023-04&engl=0}
}
@article {ART-2023-02,
   author = {Vitali Hirsch and Peter Reimann and Dennis Treder-Tschechlov and Holger Schwarz and Bernhard Mitschang},
   title = {{Exploiting Domain Knowledge to address Class Imbalance and a Heterogeneous Feature Space in Multi-Class Classification}},
   journal = {International Journal on Very Large Data Bases (VLDB-Journal)},
   publisher = {Springer},
   type = {Artikel in Zeitschrift},
   month = {Februar},
   year = {2023},
   keywords = {Classification; Domain knowledge; Multi-class Imbalance; Heterogeneous feature space},
   language = {Englisch},
   cr-category = {H.2.8 Database Applications},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Real-world data of multi-class classification tasks often show complex data characteristics that lead to a reduced classification performance. Major analytical challenges are a high degree of multi-class imbalance within data and a heterogeneous feature space, which increases the number and complexity of class patterns. Existing solutions to classification or data pre- processing only address one of these two challenges in isolation. We propose a novel classification approach that explicitly addresses both challenges of multi-class imbalance and heterogeneous feature space together. As main contribution, this approach exploits domain knowledge in terms of a taxonomy to systematically prepare the training data. Based on an experimental evaluation on both real-world data and several synthetically generated data sets, we show that our approach outperforms any other classification technique in terms of accuracy. Furthermore, it entails considerable practical benefits in real-world use cases, e.g., it reduces rework required in the area of product quality control.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2023-02&engl=0}
}
@article {ART-2019-22,
   author = {Dimitri Petrik and Mathias Mormul and Peter Reimann},
   title = {{Anforderungen f{\"u}r Zeitreihendatenbanken in der industriellen Edge}},
   journal = {HMD Praxis der Wirtschaftsinformatik},
   publisher = {Springer-Verlag},
   volume = {56},
   pages = {1282--1308},
   type = {Artikel in Zeitschrift},
   month = {Oktober},
   year = {2019},
   doi = {10.1365/s40702-019-00568-9},
   keywords = {Time Series Data; Time Series Database; Industrial IoT; Edge Computing; Defining Requirements; InfluxDB},
   language = {Deutsch},
   cr-category = {E.0 Data General},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Das industrielle Internet der Dinge (iIoT) integriert Informations- und Kommunikationstechnologien in die industriellen Prozesse und erweitert sie durch Echtzeit-Datenanalyse. Eine bedeutende Menge an Daten, die in der industriellen Fertigung generiert werden, sind sensorbasierte Zeitreihendaten, die in regelm{\"a}{\ss}igen Abst{\"a}nden generiert werden und zus{\"a}tzlich zum Sensorwert einen Zeitstempel enthalten. Spezielle Zeitreihen-Datenbanken (TSDB) sind daf{\"u}r ausgelegt, die Zeitreihendaten effizienter zu speichern. Wenn TSDBs in der N{\"a}he der Maschine (in der industriellen Edge) eingesetzt werden, sind Maschinendaten zur {\"U}berwachung zeitkritischer Prozesse aufgrund der niedrigen Latenz schnell verf{\"u}gbar, was die erforderliche Zeit f{\"u}r die Datenverarbeitung reduziert. Bisherige Untersuchungen zu TSDBs sind bei der Auswahl f{\"u}r den Einsatz in der industriellen Edge nur begrenzt hilfreich. Die meisten verf{\"u}gbaren Benchmarks von TSDBs sind performanceorientiert und ber{\"u}cksichtigen nicht die Einschr{\"a}nkungen der industriellen Edge. Wir adressieren diese L{\"u}cke und identifizieren die funktionalen Kriterien f{\"u}r den Einsatz von TSDBs im maschinennahen Umfeld und bilden somit einen qualitativen Anforderungskatalog. Des Weiteren zeigen wir am Beispiel von InfluxDB, wie dieser Katalog verwendet werden kann, mit dem Ziel die Auswahl einer geeigneten TSDB f{\"u}r Sensordaten in der Edge zu unterst{\"u}tzen.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2019-22&engl=0}
}
@article {ART-2019-10,
   author = {Cornelia Kiefer and Peter Reimann and Bernhard Mitschang},
   title = {{QUALM: Ganzheitliche Messung und Verbesserung der Datenqualit{\"a}t in der Textanalyse}},
   journal = {Datenbank-Spektrum},
   publisher = {Springer Verlag},
   pages = {1--12},
   type = {Artikel in Zeitschrift},
   month = {Juni},
   year = {2019},
   doi = {https://doi.org/10.1007/s13222-019-00318-7},
   keywords = {Datenqualit{\"a}t; Textanalyse; Text Mining; Trainingsdaten; Semantische Ressourcen},
   language = {Deutsch},
   cr-category = {H.3 Information Storage and Retrieval},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Bestehende Ans{\"a}tze zur Messung und Verbesserung der Qualit{\"a}t von Textdaten in der Textanalyse bringen drei gro{\ss}e Nachteile mit sich. Evaluationsmetriken wie zum Beispiel Accuracy messen die Qualit{\"a}t zwar verl{\"a}sslich, sie (1) sind jedoch auf aufw{\"a}ndig h{\"a}ndisch zu erstellende Goldannotationen angewiesen und (2) geben keine Ansatzpunkte f{\"u}r die Verbesserung der Qualit{\"a}t. Erste dom{\"a}nenspezifische Datenqualit{\"a}tsmethoden f{\"u}r unstrukturierte Textdaten kommen zwar ohne Goldannotationen aus und geben Ansatzpunkte zur Verbesserung der Datenqualit{\"a}t. Diese Methoden wurden jedoch nur f{\"u}r begrenzte Anwendungsgebiete entwickelt und (3) ber{\"u}cksichtigen deshalb nicht die Spezifika vieler Analysetools in Textanalyseprozessen. In dieser Arbeit pr{\"a}sentieren wir hierzu das QUALM-Konzept zum qualitativ hochwertigen Mining von Textdaten (QUALity Mining), das die drei o.g. Nachteile adressiert. Das Ziel von QUALM ist es, die Qualit{\"a}t der Analyseergebnisse, z. B. bzgl. der Accuracy einer Textklassifikation, auf Basis einer Messung und Verbesserung der Datenqualit{\"a}t zu erh{\"o}hen. QUALM bietet hierzu eine Menge an QUALM-Datenqualit{\"a}tsmethoden. QUALM-Indikatoren erfassen die Datenqualit{\"a}t ganzheitlich auf Basis der Passung zwischen den Eingabedaten und den Spezifika der Analysetools, wie den verwendeten Features, Trainingsdaten und semantischen Ressourcen (wie zum Beispiel W{\"o}rterb{\"u}chern oder Taxonomien). Zu jedem Indikator geh{\"o}rt ein passender Modifikator, mit dem sowohl die Daten als auch die Spezifika der Analysetools ver{\"a}ndert werden k{\"o}nnen, um die Datenqualit{\"a}t zu erh{\"o}hen. In einer ersten Evaluation von QUALM zeigen wir f{\"u}r konkrete Analysetools und Datens{\"a}tze, dass die Anwendung der QUALM-Datenqualit{\"a}tsmethoden auch mit einer Erh{\"o}hung der Qualit{\"a}t der Analyseergebnisse im Sinne der Evaluationsmetrik Accuracy einhergeht. Die Passung zwischen Eingabedaten und Spezifika der Analysetools wird hierzu mit konkreten QUALM-Modifikatoren erh{\"o}ht, die zum Beispiel Abk{\"u}rzungen aufl{\"o}sen oder automatisch auf Basis von Text{\"a}hnlichkeitsmetriken passende Trainingsdaten vorschlagen.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2019-10&engl=0}
}
@article {ART-2018-06,
   author = {Christian Weber and Matthias Wieland and Peter Reimann},
   title = {{Konzepte zur Datenverarbeitung in Referenzarchitekturen f{\"u}r Industrie 4.0: Konsequenzen bei der Umsetzung einer IT-Architektur}},
   journal = {Datenbank-Spektrum},
   publisher = {Springer Berlin Heidelberg},
   volume = {18},
   number = {1},
   pages = {39--50},
   type = {Artikel in Zeitschrift},
   month = {M{\"a}rz},
   year = {2018},
   issn = {1610-1995},
   doi = {10.1007/s13222-018-0275-z},
   keywords = {Industrie 4.0; Referenzarchitektur; Datenverarbeitung; RAMI4.0; IIRA},
   language = {Deutsch},
   cr-category = {H.4.0 Information Systems Applications General,     J.2 Physical Sciences and Engineering},
   ee = {https://link.springer.com/article/10.1007/s13222-018-0275-z},
   contact = {Senden Sie eine E-Mail an christian.weber@gsame.uni-stuttgart.de},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {F{\"u}r produzierende Unternehmen stellt die effiziente Verarbeitung gro{\ss}er Datenmengen eine Herausforderung dar. Die Auswahl der richtigen Architekturkonzepte f{\"u}r IT-L{\"o}sungen zur Datenverarbeitung spielt dabei eine wichtige Rolle. Um die IT an den Herausforderungen von Industrie 4.0 auszurichten, stehen Unternehmen verschiedene Referenzarchitekturen internationaler Gremien zur Verf{\"u}gung. Die Hauptbeitr{\"a}ge dieses Artikels haben das Ziel, (i) einen {\"U}berblick {\"u}ber die wichtigsten Referenzarchitekturen f{\"u}r Industrie 4.0 (I4.0) zu geben und (ii) diese unter dem Aspekt der Datenverarbeitung zu untersuchen. Dazu werden die Referenzarchitekturen anhand von Datenverarbeitungsanforderungen f{\"u}r I4.0 betrachtet. Die Untersuchung zeigt, dass die I4.0-Referenzarchitekturen jeweils einen Teilbereich der Anforderungen abdecken und sich die Konzepte gegenseitig erg{\"a}nzen. (iii) Darauf aufbauend werden aus den Datenverarbeitungsanforderungen technische Konsequenzen abgeleitet und Architekturkonzepte f{\"u}r die Realisierung einer IT-Architektur f{\"u}r die Datenverarbeitung vorgestellt. Dadurch wird es IT-Architekten erm{\"o}glicht, einen Vergleich der Referenzarchitekturen hinsichtlich projektbezogener Anforderungen an die Datenverarbeitung vorzunehmen sowie geeignete Architekturentscheidungen zu treffen.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2018-06&engl=0}
}
@article {ART-2011-14,
   author = {Peter Reimann and Holger Schwarz and Bernhard Mitschang},
   title = {{Design, Implementation, and Evaluation of a Tight Integration of Database and Workflow Engines}},
   journal = {Journal of Information and Data Management},
   editor = {Alberto H. F. Laender and Mirella M. Moro},
   publisher = {SBC - Brazilian Computer Society},
   volume = {2},
   number = {3},
   pages = {353--368},
   type = {Artikel in Zeitschrift},
   month = {Oktober},
   year = {2011},
   issn = {2178-7107},
   keywords = {Data-Intensive Workflow; Improved Local Data Processing; Scientific Workflow; Simulation Workflow},
   language = {Englisch},
   cr-category = {D.2.11 Software Engineering Software Architectures,     H.2.8 Database Applications,     H.4.1 Office Automation},
   contact = {Peter Reimann Peter.Reimann@ipvs.uni-stuttgart.de},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Accessing and processing huge amounts of heterogeneous and distributed data are some of the major challenges of data-intensive workflows. Traditionally, the descriptions of such workflows focus on their data flow. Nevertheless, control-flow-oriented workflow languages are increasingly adapted to the needs of data-intensive workflows. This provides a common level of abstraction for both data-intensive workflows and classical orchestration workflows, e.g., business workflows, which then enables a comprehensive optimization across all workflows. However, the problem still remains that workflows described in control-flow-oriented languages tend to be less efficient for data-intensive processes compared to specialized data-flow-oriented approaches. In this paper, we propose a new kind of optimization targeted at data-intensive workflows that are described in control-flow-oriented languages. We show how to improve efficiency of such workflows by introducing various techniques that partition the local data processing tasks to be performed during workflow execution in an improved way. These data processing tasks are either assigned to the workflow engine or to the tightly integrated local database engine. We evaluate the effectiveness of these techniques by means of various test scenarios.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2011-14&engl=0}
}
@inbook {INBOOK-2021-01,
   author = {Dimitri Petrik and Mathias Mormul and Peter Reimann and Christoph Gr{\"o}ger},
   title = {{Anforderungen f{\"u}r Zeitreihendatenbanken im industriellen IoT}},
   series = {IoT – Best Practices},
   publisher = {Springer-Verlag},
   pages = {339--377},
   type = {Beitrag in Buch},
   month = {Mai},
   year = {2021},
   keywords = {Zeitreihendaten; Zeitreihendatenbanken; Industrial IoT; Edge Computing; Data Lake; InfluxDB},
   language = {Deutsch},
   cr-category = {H.2.8 Database Applications},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Das industrielle Internet der Dinge (IIoT) integriert Informations- und Kommunikationstechnologien in industrielle Prozesse und erweitert sie durch Echtzeit-Datenanalyse. Hierbei sind sensorbasierte Zeitreihen ein wesentlicher Typ von Daten, die in der industriellen Fertigung generiert werden. Sensorbasierte Zeitreihendaten werden in regelm{\"a}{\ss}igen Abst{\"a}nden generiert und enthalten zus{\"a}tzlich zum Sensorwert einen Zeitstempel. Spezielle Zeitreihen-Datenbanken (eng.: Time Series Databases (TSDB)) sind daf{\"u}r ausgelegt, Zeitreihendaten effizient zu speichern. Wenn TSDBs maschinennah, d. h. in der industriellen Edge, eingesetzt werden, sind Maschinendaten zur {\"U}berwachung zeitkritischer Prozesse aufgrund der niedrigen Latenz schnell verf{\"u}gbar, was die erforderliche Zeit f{\"u}r die Datenverarbeitung reduziert. Andererseits k{\"o}nnen TSDBs auch in den Data Lakes als skalierbaren Datenplattformen zur Speicherung und Analyse von Rohdaten zum Einsatz kommen, um die langfristige Vorhaltung von Zeitreihendaten zu erm{\"o}glichen. Bisherige Untersuchungen zu TSDBs sind bei der Auswahl f{\"u}r den Einsatz in der industriellen Edge und im Data Lake nicht vorhanden. Die meisten verf{\"u}gbaren Benchmarks von TSDBs sind performanceorientiert und ber{\"u}cksichtigen nicht die Randbedingungen einer industriellen Edge oder eines Data Lake. Wir adressieren diese L{\"u}cke und identifizieren funktionale Kriterien f{\"u}r den Einsatz von TSDBs in diesen beiden Umgebungen und bilden somit einen qualitativen Kriterienkatalog. Des Weiteren zeigen wir am Beispiel von InfluxDB, wie dieser Katalog verwendet werden kann, mit dem Ziel die systematische Auswahl einer passenden TSDB f{\"u}r den Einsatz in der Edge und im Data Lake zu unterst{\"u}tzen.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INBOOK-2021-01&engl=0}
}