@inproceedings {INPROC-2024-05,
   author = {Jan Schneider and Christoph Gr{\"o}ger and Arnold Lutsch},
   title = {{The Data Platform Evolution: From Data Warehouses over Data Lakes to Lakehouses}},
   booktitle = {Proceedings of the 34th GI-Workshop on Foundations of Databases (Grundlagen von Datenbanken), Hirsau, Germany},
   editor = {Holger Schwarz},
   publisher = {CEUR Workshop Proceedings},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   series = {CEUR Workshop Proceedings},
   volume = {3714},
   pages = {67--71},
   type = {Workshop-Beitrag},
   month = {Juli},
   year = {2024},
   issn = {1613-0073},
   keywords = {Lakehouse; Data Warehouse; Data Lake; Data Management; Data Analytics},
   language = {Englisch},
   cr-category = {H.3.4 Information Storage and Retrieval Systems and Software,     H.4.2 Information Systems Applications Types of Systems},
   ee = {https://ceur-ws.org/Vol-3714/invited2.pdf},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {The continuously increasing availability of data and the growing maturity of data-driven analysis techniques have encouraged enterprises to collect and analyze huge amounts of business-relevant data in order to exploit it for competitive advantages. To facilitate these processes, various platforms for analytical data management have been developed: While data warehouses have traditionally been used by business analysts for reporting and OLAP, data lakes emerged as an alternative concept that also supports advanced analytics. As these two common types of data platforms show rather contrary characteristics and target different user groups and analytical approaches, enterprises usually need to employ both of them, resulting in complex, error-prone and cost-expensive architectures. To address these issues, efforts have recently become apparent to combine features of data warehouses and data lakes into so-called lakehouses, which pursue to serve all kinds of analytics from a single data platform. This paper provides an overview on the evolution of analytical data platforms from data warehouses over data lakes to lakehouses and elaborates on the vision and characteristics of the latter. Furthermore, it addresses the question of what aspects common data lakes are currently missing that prevent them from transitioning to lakehouses.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2024-05&engl=0}
}
@inproceedings {INPROC-2024-04,
   author = {Jan Schneider and Arnold Lutsch and Christoph Gr{\"o}ger and Holger Schwarz and Bernhard Mitschang},
   title = {{First Experiences on the Application of Lakehouses in Industrial Practice}},
   booktitle = {Proceedings of the 35th GI-Workshop on Foundations of Databases (Grundlagen von Datenbanken), Herdecke, Germany},
   editor = {Uta St{\"o}rl},
   publisher = {CEUR Workshop Proceedings},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   series = {CEUR Workshop Proceedings},
   volume = {3710},
   pages = {3--8},
   type = {Workshop-Beitrag},
   month = {Juni},
   year = {2024},
   isbn = {1613-0073},
   keywords = {Data Lakehouse; Data Platform; Platform Architecture; Data Analytics; Case Study; Industry Experience},
   language = {Englisch},
   cr-category = {H.3.4 Information Storage and Retrieval Systems and Software,     H.4.2 Information Systems Applications Types of Systems},
   ee = {https://ceur-ws.org/Vol-3710/paper1.pdf},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {In recent years, so-called lakehouses have emerged as a new type of data platform that intends to combine characteristics of data warehouses and data lakes. Although companies started to employ the associated concepts and technologies as part of their analytics architectures, little is known about their practical medium- and long-term experiences as well as proven architectural decisions. Additionally, there is only limited knowledge about how lakehouses can be utilized effectively in an industrial context. Hence, it remains unclear under which circumstances lakehouses represent a viable alternative to conventional data platforms. To address this gap, we conducted a case study on a real-world industrial case, in which manufacturing data needs to be managed and analytically exploited. Within the scope of this case, a dedicated analytics department has been testing and leveraging a lakehouse approach for several months in a productive environment with high data volumes and various types of analytical workloads. The paper at hand presents the results of our within-case analyses and focuses on the industrial setting of the case as well as the architecture of the utilized lakehouse. This way, it provides preliminary insights on the application of lakehouses in industrial practice and refers to useful architectural decisions.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2024-04&engl=0}
}
@inproceedings {INPROC-2023-06,
   author = {Jan Schneider and Christoph Gr{\"o}ger and Arnold Lutsch and Holger Schwarz and Bernhard Mitschang},
   title = {{Assessing the Lakehouse: Analysis, Requirements and Definition}},
   booktitle = {Proceedings of the 25th International Conference on Enterprise Information Systems, ICEIS 2023, Volume 1, Prague, Czech Republic, April 24-26, 2023},
   editor = {Joaquim Filipe and Michal Smialek and Alexander Brodsky and Slimane Hammoudi},
   address = {Prague},
   publisher = {SciTePress},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {44--56},
   type = {Konferenz-Beitrag},
   month = {Mai},
   year = {2023},
   isbn = {978-989-758-648-4},
   issn = {2184-4992},
   doi = {10.5220/0011840500003467},
   keywords = {Lakehouse; Data Warehouse; Data Lake; Data Management; Data Analytics},
   language = {Englisch},
   cr-category = {H.2.4 Database Management Systems,     H.2.7 Database Administration,     H.2.8 Database Applications},
   ee = {https://www.scitepress.org/PublicationsDetail.aspx?ID=9ydI3Lyl2Fk=,     https://doi.org/10.5220/0011840500003467},
   contact = {jan.schneider@ipvs.uni-stuttgart.de},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {The digital transformation opens new opportunities for enterprises to optimize their business processes by applying data-driven analysis techniques. For storing and organizing the required huge amounts of data, different types of data platforms have been employed in the past, with data warehouses and data lakes being the most prominent ones. Since they possess rather contrary characteristics and address different types of analytics, companies typically utilize both of them, leading to complex architectures with replicated data and slow analytical processes. To counter these issues, vendors have recently been making efforts to break the boundaries and to combine features of both worlds into integrated data platforms. Such systems are commonly called lakehouses and promise to simplify enterprise analytics architectures by serving all kinds of analytical workloads from a single platform. However, it remains unclear how lakehouses can be characterized, since existing definitions focus al most arbitrarily on individual architectural or functional aspects and are often driven by marketing. In this paper, we assess prevalent definitions for lakehouses and finally propose a new definition, from which several technical requirements for lakehouses are derived. We apply these requirements to several popular data management tools, such as Delta Lake, Snowflake and Dremio in order to evaluate whether they enable the construction of lakehouses.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2023-06&engl=0}
}
@inproceedings {INPROC-2022-08,
   author = {Rebecca Eichler and Christoph Gr{\"o}ger and Eva Hoos and Christoph Stach and Holger Schwarz and Bernhard Mitschang},
   title = {{Establishing the Enterprise Data Marketplace: Characteristics, Architecture, and Challenges}},
   booktitle = {Proceedings of the Workshop on Data Science for Data Marketplaces in Conjunction with the 48th International Conference on Very Large Data Bases},
   editor = {Xiaohui Yu and Jian Pei},
   publisher = {-},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {1--12},
   type = {Workshop-Beitrag},
   month = {September},
   year = {2022},
   language = {Englisch},
   cr-category = {E.m Data Miscellaneous,     H.3.7 Digital Libraries,     H.4.m Information Systems Applications Miscellaneous},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Companies today have increasing amounts of data at their disposal, most of which is not used, leaving the data value unexploited. In order to leverage the data value, the data must be democratized, i.e., made available to the company employees. In this context, the use of enterprise data marketplaces, platforms for trading data within a company, are proposed. However, specifics of enterprise data marketplaces and how these can be implemented have not been investigated in literature so far. To shed light on these topics, we illustrate the characteristics of an enterprise data marketplace and highlight according marketplace requirements. We provide an enterprise data marketplace architecture, discuss how it integrates into a company's system landscape and present an enterprise data marketplace prototype. Finally, we examine organizational and technical challenges which arise when operating a marketplace in the enterprise context. In this paper, we thereby present the enterprise data marketplace as a distinct marketplace type and provide the basis for establishing it within a company.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2022-08&engl=0}
}
@inproceedings {INPROC-2022-05,
   author = {Rebecca Eichler and Christoph Gr{\"o}ger and Eva Hoos and Holger Schwarz and Bernhard Mitschang},
   title = {{Data Shopping — How an Enterprise Data Marketplace Supports Data Democratization in Companies}},
   booktitle = {Proceedings of the 34th International Conference on Intelligent Information Systems},
   editor = {Jochen De Weerdt and Artem Polyvyanyy},
   address = {Stuttgart},
   publisher = {Springer International Publishing},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   series = {Rebecca Eichler},
   pages = {19--26},
   type = {Konferenz-Beitrag},
   month = {Mai},
   year = {2022},
   isbn = {https://doi.org/10.1007/978-3-031-07481-3_3},
   keywords = {Data Marketplace; Data Sharing; Data Democratization},
   language = {Englisch},
   cr-category = {H.0 Information Systems General},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {To exploit the company's data value, employees must be able to find, understand and access it. The process of making corporate data available to the majority of the company's employees is referred to as data democratization. In this work, we present the current state and challenges of data democratization in companies, derived from a comprehensive literature study and expert interviews we conducted with a manufacturer. In this context a data consumer's journey is presented that reflects the required steps, tool types and roles for finding, understanding and accessing data in addition to revealing three data democratization challenges. To address these challenges we propose the use of an enterprise data marketplace, a novel type of information system for sharing data within the company. We developed a prototype based on which a suitability assessment of a data marketplace yields an improved consumer journey and demonstrates that the marketplace addresses the data democratization challenges and consequently, shows that the marketplace is suited for realizing data democratization.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2022-05&engl=0}
}
@inproceedings {INPROC-2021-09,
   author = {Eduard Wagner and Bernd Keller and Peter Reimann and Christoph Gr{\"o}ger and Dieter Spath},
   title = {{Advanced Analytics for Evaluating Critical Joining Technologies in Automotive Body Structures and Body Shops}},
   booktitle = {Proceedings of the 15th CIRP Conference on Intelligent Computation in Manufacturing Engineering (CIRP ICME)},
   publisher = {Elsevier},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   type = {Konferenz-Beitrag},
   month = {Juli},
   year = {2021},
   keywords = {Body Shop; Data Analytics; Data Mining; Advanced Analytics; Machine Learning},
   language = {Englisch},
   cr-category = {H.2.8 Database Applications},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {The product development process within the automotive industry is subject to changing demands due to internal and external influences. These influences and adjustments especially affect the car body and its inherent joining technology, as critical stages of variant creation. However, current literature does not offer a suitable analytical method to identify and assess these critical influences. We propose an advanced analytics approach that combines data mining and machine learning techniques within the car body substructure. The evaluation within the MercedesBenz AG shows that our approach facilitates a quantitative assessment of unknown interdependencies between car body modules and corresponding joining technique},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2021-09&engl=0}
}
@inproceedings {INPROC-2021-08,
   author = {Alexander Birk and Yannick Wilhelm and Simon Dreher and Christian Flack and Peter Reimann and Christoph Gr{\"o}ger},
   title = {{A Real-World Application of Process Mining for Data-Driven Analysis of Multi-Level Interlinked Manufacturing Processes}},
   booktitle = {Procedia CIRP: Proceedings of the 54th CIRP Conference on Manufacturing Systems (CIRP CMS 2021)},
   publisher = {Elsevier},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   type = {Konferenz-Beitrag},
   month = {September},
   year = {2021},
   keywords = {Process Mining; Multi-level Interlinked Manufacturing Process; Heterogeneous Data Sources; Data Integration},
   language = {Englisch},
   cr-category = {H.2.8 Database Applications},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Process Mining (PM) has huge potential for manufacturing process analysis. However, there is little research on practical applications. We investigate a real-world manufacturing process of pneumatic valves. The manufacturing process comprises interlinked events at the superordinate business process level and at the subordinate machine level, making its analysis based on PM challenging.We show how to integrate heterogeneous data sources and give examples how PM enables a deeper understanding of the manufacturing process, thereby helping to uncover optimization potentials. Furthermore, we discuss challenges in data integration and point out limitations of current PM techniques in manufacturing.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2021-08&engl=0}
}
@inproceedings {INPROC-2021-06,
   author = {Rebecca Eichler and Corinna Giebler and Christoph Gr{\"o}ger and Eva Hoos and Holger Schwarz and Bernhard Mitschang},
   title = {{Enterprise-Wide Metadata Management - An Industry Case on the Current State and Challenges}},
   booktitle = {24thInternational Conference on Business Information Systems},
   editor = {Witold Abramowicz and S{\"o}ren Auer and Lewa\&\#324 and El\&\#380 Ska and Bieta},
   publisher = {TIB Open Publishing},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {269--279},
   type = {Konferenz-Beitrag},
   month = {Juli},
   year = {2021},
   doi = {https://doi.org/10.52825/bis.v1i.47},
   language = {Englisch},
   cr-category = {A.0 General Literature, General},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Abstract. Metadata management is a crucial success factor for companies today, as for example, it enables exploiting data value fully or enables legal compliance. With the emergence of new concepts, such as the data lake, and new objectives, such as the enterprise-wide sharing of data, metadata management has evolved and now poses a renewed challenge for companies. In this context, we interviewed a globally active manufacturer to reveal how metadata management is implemented in practice today and what challenges companies are faced with and whether these constitute research gaps. As an outcome, we present the company{\^a}€™s metadata management goals and their corresponding solution approaches and challenges. An evaluation of the challenges through a literature and tool review yields three research gaps, which are concerned with the topics: (1) metadata management for data lakes, (2) categorizations and compositions of metadata management tools for comprehensive metadata management, and (3) the use of data marketplaces as metadata-driven exchange platforms within an enterprise. The gaps lay the groundwork for further research activities in the field of metadata management and the industry case represents a starting point for research to realign with real-world industry needs.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2021-06&engl=0}
}
@inproceedings {INPROC-2021-05,
   author = {Corinna Giebler and Christoph Gr{\"o}ger and Eva Hoos and Rebecca Eichler and Holger Schwarz and Bernhard Mitschang},
   title = {{The Data Lake Architecture Framework}},
   booktitle = {Datenbanksysteme f{\"u}r Business, Technologie und Web (BTW 2021), 19. Fachtagung des GI-Fachbereichs Datenbanken und Informationssysteme (DBIS), 13.-17. September 2021, Dresden, Germany},
   publisher = {Gesellschaft f{\"u}r Informatik},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {351--370},
   type = {Konferenz-Beitrag},
   month = {September},
   year = {2021},
   doi = {10.18420/btw2021-19},
   language = {Englisch},
   cr-category = {H.4 Information Systems Applications},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {During recent years, data lakes emerged as a way to manage large amounts of heterogeneous data for modern data analytics. Although various work on individual aspects of data lakes exists, there is no comprehensive data lake architecture yet. Concepts that describe themselves as a {\^a}€śdata lake architecture{\^a}€ť are only partial. In this work, we introduce the data lake architecture framework. It supports the definition of data lake architectures by defining nine architectural aspects, i.e., perspectives on a data lake, such as data storage or data modeling, and by exploring the interdependencies between these aspects. The included methodology helps to choose appropriate concepts to instantiate each aspect. To evaluate the framework, we use it to configure an exemplary data lake architecture for a real-world data lake implementation. This final assessment shows that our framework provides comprehensive guidance in the configuration of a data lake architecture.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2021-05&engl=0}
}
@inproceedings {INPROC-2020-57,
   author = {Simon Dreher and Peter Reimann and Christoph Gr{\"o}ger},
   title = {{Application Fields and Research Gaps of Process Mining in Manufacturing Companies}},
   booktitle = {Proceedings of INFORMATIK 2020},
   editor = {R. H. Reussner and A Koziolek and R. Heinrich},
   publisher = {GI Gesellschaft f{\"u}r Informatik e.V. (GI)},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {621--634},
   type = {Konferenz-Beitrag},
   month = {Oktober},
   year = {2020},
   keywords = {Process Mining; Application; Production; Manufacturing; SCOR; Literature Review},
   language = {Englisch},
   cr-category = {H.2.8 Database Applications},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {To survive in global competition with increasing cost pressure, manufacturing companies must continuously optimize their manufacturing-related processes. Thereby, process mining constitutes an important data-driven approach to gain a profound understanding of the actual processes and to identify optimization potentials by applying data mining and machine learning techniques on event data. However, there is little knowledge about the feasibility and usefulness of process mining specifically in manufacturing companies. Hence, this paper provides an overview of potential applications of process mining for the analysis of manufacturing-related processes. We conduct a systematic literature review, classify relevant articles according to the Supply-Chain-Operations-Reference-Model (SCOR-model), identify research gaps, such as domain-specific challenges regarding unstructured, cascaded and non-linear processes or heterogeneous data sources, and give practitioners inspiration which manufacturing-related processes can be analyzed by process mining techniques.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-57&engl=0}
}
@inproceedings {INPROC-2020-55,
   author = {Corinna Giebler and Christoph Gr{\"o}ger and Eva Hoos and Holger Schwarz and Bernhard Mitschang},
   title = {{A Zone Reference Model for Enterprise-Grade Data Lake Management}},
   booktitle = {Proceedings of the 24th IEEE Enterprise Computing Conference},
   publisher = {IEEE},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {57--66},
   type = {Konferenz-Beitrag},
   month = {Oktober},
   year = {2020},
   keywords = {Data Lake; Zones; Reference Model; Industry Case; Industry Experience},
   language = {Englisch},
   cr-category = {H.4 Information Systems Applications},
   contact = {Senden Sie eine E-Mail an corinna.giebler@ipvs.uni-stuttgart.de},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Data lakes are on the rise as data platforms for any kind of analytics, from data exploration to machine learning. They achieve the required flexibility by storing heterogeneous data in their raw format, and by avoiding the need for pre-defined use cases. However, storing only raw data is inefficient, as for many applications, the same data processing has to be applied repeatedly. To foster the reuse of processing steps, literature proposes to store data in different degrees of processing in addition to their raw format. To this end, data lakes are typically structured in zones. There exists various zone models, but they are varied, vague, and no assessments are given. It is unclear which of these zone models is applicable in a practical data lake implementation in enterprises. In this work, we assess existing zone models using requirements derived from multiple representative data analytics use cases of a real-world industry case. We identify the shortcomings of existing work and develop a zone reference model for enterprise-grade data lake management in a detailed manner. We assess the reference model's applicability through a prototypical implementation for a real-world enterprise data lake use case. This assessment shows that the zone reference model meets the requirements relevant in practice and is ready for industry use.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-55&engl=0}
}
@inproceedings {INPROC-2020-50,
   author = {Rebecca Eichler and Corinna Giebler and Christoph Gr{\"o}ger and Holger Schwarz and Bernhard Mitschang},
   title = {{HANDLE - A Generic Metadata Model for Data Lakes}},
   booktitle = {Big Data Analytics and Knowledge Discovery: 22nd International Conference, DaWaK 2020, Bratislava, Slovakia, September 14–17, 2020, Proceedings},
   publisher = {Springer, Cham},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {73--88},
   type = {Konferenz-Beitrag},
   month = {September},
   year = {2020},
   language = {Englisch},
   cr-category = {H.2 Database Management},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {The substantial increase in generated data induced the development of new concepts such as the data lake. A data lake is a large storage repository designed to enable flexible extraction of the data{\^a}€™s value. A key aspect of exploiting data value in data lakes is the collection and management of metadata. To store and handle the metadata, a generic metadata model is required that can reflect metadata of any potential metadata management use case, e.g., data versioning or data lineage. However, an evaluation of existent metadata models yields that none so far are sufficiently generic. In this work, we present HANDLE, a generic metadata model for data lakes, which supports the flexible integration of metadata, data lake zones, metadata on various granular levels, and any metadata categorization. With these capabilities HANDLE enables comprehensive metadata management in data lakes. We show HANDLE{\^a}€™s feasibility through the application to an exemplary access-use-case and a prototypical implementation. A comparison with existent models yields that HANDLE can reflect the same information and provides additional capabilities needed for metadata management in data lakes.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-50&engl=0}
}
@inproceedings {INPROC-2020-45,
   author = {Rebecca Eichler and Corinna Giebler and Christoph Gr{\"o}ger and Holger Schwarz and Bernhard Mitschang},
   title = {{HANDLE - A Generic Metadata Model for Data Lakes}},
   booktitle = {Big Data Analytics and Knowledge Discovery},
   editor = {Min Song and Il-Yeol Song and Gabriele Kotsis and A Min Tjoa and Ismail Khalil},
   publisher = {Springer Nature Switzerland AG},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   series = {Lecture Notes in Computer Science},
   volume = {12393},
   pages = {73--88},
   type = {Konferenz-Beitrag},
   month = {September},
   year = {2020},
   doi = {https://doi.org/10.1007/978-3-030-59065-9_7},
   keywords = {Metadata management; Metadata model; Data lake},
   language = {Englisch},
   cr-category = {H.2 Database Management},
   ee = {ftp://ftp.informatik.uni-stuttgart.de/pub/library/ncstrl.ustuttgart_fi/INPROC-2020-45/INPROC-2020-45.pdf},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {The substantial increase in generated data induced the development of new concepts such as the data lake. A data lake is a large storage repository designed to enable flexible extraction of the data's value. A key aspect of exploiting data value in data lakes is the collection and management of metadata. To store and handle the metadata, a generic metadata model is required that can reflect metadata of any potential metadata management use case, e.g., data versioning or data lineage. However, an evaluation of existent metadata models yields that none so far are sufficiently generic. In this work, we present HANDLE, a generic metadata model for data lakes, which supports the flexible integration of metadata, data lake zones, metadata on various granular levels, and any metadata categorization. With these capabilities HANDLE enables comprehensive metadata management in data lakes. We show HANDLE's feasibility through the application to an exemplary access-use-case and a prototypical implementation. A comparison with existent models yields that HANDLE can reflect the same information and provides additional capabilities needed for metadata management in data lakes.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-45&engl=0}
}
@inproceedings {INPROC-2019-15,
   author = {Corinna Giebler and Christoph Gr{\"o}ger and Eva Hoos and Holger Schwarz},
   title = {{Modeling Data Lakes with Data Vault: Practical Experiences, Assessment, and Lessons Learned}},
   booktitle = {Proceedings of the 38th Conference on Conceptual Modeling (ER 2019)},
   publisher = {Springer},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {1--14},
   type = {Konferenz-Beitrag},
   month = {November},
   year = {2019},
   keywords = {Data Lakes; Data Vault; Data Modeling; Industry Experience; Assessment; Lessons Learned},
   language = {Deutsch},
   cr-category = {H.2.1 Database Management Logical Design},
   contact = {Senden Sie eine E-Mail an Corinna.Giebler@ipvs.uni-stuttgart.de},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Data lakes have become popular to enable organization-wide analytics on heterogeneous data from multiple sources. Data lakes store data in their raw format and are often characterized as schema-free. Nevertheless, it turned out that data still need to be modeled, as neglecting data modeling may lead to issues concerning e.g., quality and integration. In current research literature and industry practice, Data Vault is a popular modeling technique for structured data in data lakes. It promises a flexible, extensible data model that preserves data in their raw format. However, hardly any research or assessment exist on the practical usage of Data Vault for modeling data lakes. In this paper, we assess the Data Vault model{\^a}€™s suitability for the data lake context, present lessons learned, and investigate success factors for the use of Data Vault. Our discussion is based on the practical usage of Data Vault in a large, global manufacturer{\^a}€™s data lake and the insights gained in real-world analytics projects.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2019-15&engl=0}
}
@inproceedings {INPROC-2019-14,
   author = {Corinna Giebler and Christoph Gr{\"o}ger and Eva Hoos and Holger Schwarz},
   title = {{Leveraging the Data Lake - Current State and Challenges}},
   booktitle = {Proceedings of the 21st International Conference on Big Data Analytics and Knowledge Discovery (DaWaK'19)},
   publisher = {Springer Nature},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {1--10},
   type = {Konferenz-Beitrag},
   month = {August},
   year = {2019},
   keywords = {Data Lakes, State of the Art, Challenges},
   language = {Deutsch},
   cr-category = {H.2.4 Database Management Systems,     H.2.8 Database Applications},
   contact = {Senden Sie eine E-Mail an Corinna.Giebler@ipvs.uni-stuttgart.de},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {The digital transformation leads to massive amounts of heterogeneous data challenging traditional data warehouse solutions in enterprises. In order to exploit these complex data for competitive advantages, the data lake recently emerged as a concept for more flexible and powerful data analytics. However, existing literature on data lakes is rather vague and incomplete, and the various realization approaches that have been proposed neither cover all aspects of data lakes nor do they provide a comprehensive design and realization strategy. Hence, enterprises face multiple challenges when building data lakes. To address these shortcomings, we investigate existing data lake literature and discuss various design and realization aspects for data lakes, such as governance or data models. Based on these insights, we identify challenges and research gaps concerning (1) data lake architecture, (2) data lake governance, and (3) a comprehensive strategy to realize data lakes. These challenges still need to be addressed to successfully leverage the data lake in practice.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2019-14&engl=0}
}
@inproceedings {INPROC-2017-04,
   author = {Matthias Wieland and Pascal Hirmer and Frank Steimle and Christoph Gr{\"o}ger and Bernhard Mitschang and Eike Rehder and Dominik Lucke and Omar Abdul Rahman and Thomas Bauernhansl},
   title = {{Towards a Rule-Based Manufacturing Integration Assistant}},
   booktitle = {Proceedings of the 49th CIRP Conference on Manufacturing Systems (CIRP-CMS 2016); Stuttgart, Germany, May 25-27, 2016},
   editor = {Engelbert Westk{\"a}mper and Thomas Bauernhansl},
   publisher = {Elsevier},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   series = {Procedia CIRP},
   volume = {57},
   pages = {213--218},
   type = {Konferenz-Beitrag},
   month = {Januar},
   year = {2017},
   doi = {10.1016/j.procir.2016.11.037},
   keywords = {Rules; Integration; Manufacturing; Smart-Factory; Industrie 4.0},
   language = {Englisch},
   cr-category = {H.4.0 Information Systems Applications General,     J.2 Physical Sciences and Engineering,     I.2.1 Applications and Expert Systems,     I.2.4 Knowledge Representation Formalisms and Methods},
   ee = {http://www.sciencedirect.com/science/article/pii/S221282711631191X},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Recent developments and steadily declining prices in ICT enable an economic application of advanced digital tools in wide areas of manufacturing. Solutions based on concepts and technologies of the Internet of Thingsť or Cyber Physical Systems can be used to implement monitoring as well as self-organization of production, maintenance or logistics processes. However, integration of new digital tools in existing heterogeneous manufacturing IT systems and integration of machines and devices into manufacturing environments is an expensive and tedious task. Therefore, integration issues on IT and manufacturing level significantly prevent agile manufacturing. Especially small and medium-sized enterprises do not have the expertise or the investment possibilities to realize such an integration. To tackle this issue, we present the approach of the Manufacturing Integration Assistant - MIALinx. The objective is to develop and implement a lightweight and easy-to-use integration solution for small and medium-sized enterprises based on recent web automation technologies. MIALinx aims to simplify the integration using simple programmable, flexible and reusable IF-THEN rules that connect occurring situations in manufacturing, such as a machine break down, with corresponding actions, e.g., an automatic maintenance order generation. For this purpose, MIALinx connects sensors and actuators based on defined rules whereas the rule set is defined in a domain-specific, easy-to-use manner to enable rule modeling by domain experts. Through the definition of rule sets, the workers{\^a}€™ knowledge can be also externalized. Using manufacturing-approved cloud computing technologies, we enable robustness, security, and a low-effort, low-cost integration of MIALinx into existing manufacturing environments to provide advanced digital tools also for small and medium-sized enterprises.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2017-04&engl=0}
}
@inproceedings {INPROC-2016-54,
   author = {Henri Tokola and Christoph Gr{\"o}ger and Eeva J{\"a}rvenp{\"a}{\"a} and Esko Niemi},
   title = {{Designing Manufacturing Dashboards on the Basis of a Key Performance Indicator Survey}},
   booktitle = {Proceedings of the 49th CIRP Conference on Manufacturing Systems (CIRP CMS)},
   publisher = {Elsevier},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   series = {Procedia CIRP},
   volume = {57},
   pages = {619--624},
   type = {Konferenz-Beitrag},
   month = {Mai},
   year = {2016},
   keywords = {Dashboards; Key Performance Indicators (KPIs); Scorecard},
   language = {Englisch},
   cr-category = {J.1 Administration Data Processing},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Target-oriented and real-time information provisioning across all hierarchy levels, from shop floor to top floor, is an important success factory for manufacturing companies to facilitate agile and efficient manufacturing. In general, dashboards – in terms of digital single-screen displays – address this challenge and support intuitive monitoring and visualisation of business performance information. Yet, existing dashboard research mainly focus on IT issues and lack a systematic study of the dashboard content. To address this gap, in this paper, we design three representative dashboards for manufacturing companies based on a comprehensive survey that focuses on suitable key performance indicators for different manufacturing target groups. The paper consists of three parts. First, the paper provides a literature review about design principles of dashboards. Second, it publishes the results of a survey of manufacturing companies on preferred key performance indicators (KPIs) for dashboards and the use of dashboards. Third, using the results obtained from the survey, three representative manufacturing dashboards are designed: an operational dashboard for workers, a tactical dashboard for managers and a strategy dashboard for executives. The results underline that different KPIs are preferred for dashboards on different hierarchy levels and that mobile usage of dashboards, especially on tablet pcs, is favoured.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2016-54&engl=0}
}
@inproceedings {INPROC-2016-07,
   author = {Christoph Gr{\"o}ger and Laura Kassner and Eva Hoos and Jan K{\"o}nigsberger and Cornelia Kiefer and Stefan Silcher and Bernhard Mitschang},
   title = {{The Data-Driven Factory. Leveraging Big Industrial Data for Agile, Learning and Human-Centric Manufacturing}},
   booktitle = {Proceedings of the 18th International Conference on Enterprise Information Systems},
   editor = {Slimane Hammoudi and Leszek Maciaszek and Michele M. Missikoff and Olivier Camp and Jose Cordeiro},
   publisher = {SciTePress},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {40--52},
   type = {Konferenz-Beitrag},
   month = {April},
   year = {2016},
   isbn = {978-989-758-187-8},
   keywords = {IT Architecture, Data Analytics, Big Data, Smart Manufacturing, Industrie 4.0},
   language = {Englisch},
   cr-category = {H.4.0 Information Systems Applications General,     J.2 Physical Sciences and Engineering},
   contact = {Email an Christoph.Groeger@ipvs.uni-stuttgart.de oder laura.kassner@ipvs.uni-stuttgart.de},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Global competition in the manufacturing industry is characterized by ever shorter product life cycles, increas-ing complexity and a turbulent environment. High product quality, continuously improved processes as well as changeable organizational structures constitute central success factors for manufacturing companies. With the rise of the internet of things and Industrie 4.0, the increasing use of cyber-physical systems as well as the digitalization of manufacturing operations lead to massive amounts of heterogeneous industrial data across the product life cycle. In order to leverage these big industrial data for competitive advantages, we present the concept of the data-driven factory. The data-driven factory enables agile, learning and human-centric manu-facturing and makes use of a novel IT architecture, the Stuttgart IT Architecture for Manufacturing (SITAM), overcoming the insufficiencies of the traditional information pyramid of manufacturing. We introduce the SITAM architecture and discuss its conceptual components with respect to service-oriented integration, ad-vanced analytics and mobile information provisioning in manufacturing. Moreover, for evaluation purposes, we present a prototypical implementation of the SITAM architecture as well as a real-world application sce-nario from the automotive industry to demonstrate the benefits of the data-driven factory.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2016-07&engl=0}
}
@inproceedings {INPROC-2014-64,
   author = {Eva Hoos and Christoph Gr{\"o}ger and Bernhard Mitschang},
   title = {{Mobile Apps in Engineering: A Process-Driven Analysis of Business Potentials and Technical Challenges}},
   booktitle = {Proceedings of the 9th CIRP Conference on Intelligent Computation in Manufacturing Engineering (CIRP ICME), 23-25 July, 2014, Capri (Naples), Italy},
   publisher = {CIRP},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   type = {Konferenz-Beitrag},
   month = {Juli},
   year = {2014},
   language = {Deutsch},
   cr-category = {H.4.0 Information Systems Applications General,     J.4 Social and Behavioral Sciences,     J.2 Physical Sciences and Engineering},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Mobile apps on smartphones and tablet PCs are more and more employed in enterprises to optimize business processes, e.g. by elimination of paper-based data collection. With respect to engineering, mobile apps provide a huge potential for increased flexibility and efficiency due to their anywhere and anytime characteristics, e.g., for product testing in the field. However, not every usage of mobile apps is beneficial from a business point of view and existing apps for engineering represent only rudimentary front-ends for stationary IT systems without an app-oriented redesign. Hence, there are three core challenges to leverage the potential of mobile apps in engineering: (1) identifying value-added app usage scenarios from a process point of view, (2) realizing a task-oriented and context-aware user interface design and (3) mastering technical obstacles at the app implementation. In this paper, we address these challenges by a case-oriented analysis of selected engineering processes in the automotive industry in order to identify engineering tasks suited for the usage of mobile apps. On this basis, we design corresponding engineering apps and analyze their business potentials. Moreover, we derive common technological challenges for the development of engineering apps, e.g. data synchronization aspects, and highlight further research issues.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2014-64&engl=0}
}
@inproceedings {INPROC-2014-59,
   author = {Laura Kassner and Christoph Gr{\"o}ger and Bernhard Mitschang and Engelbert Westk{\"a}mper},
   title = {{Product Life Cycle Analytics - Next Generation Data Analytics on Structured and Unstructured Data}},
   booktitle = {Proceedings of the 9th CIRP Conference on Intelligent Computation in Manufacturing Engineering - CIRP ICME '14},
   address = {Naples},
   publisher = {Elsevier},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {1--6},
   type = {Konferenz-Beitrag},
   month = {Juli},
   year = {2014},
   keywords = {analytics, big data, unstructured data, text analytics, product life cycle management, PLM, data warehousing, product life cycle analytics, data integration},
   language = {Englisch},
   cr-category = {H.3.1 Content Analysis and Indexing,     H.3.4 Information Storage and Retrieval Systems and Software,     J.2 Physical Sciences and Engineering,     J.6 Computer-Aided Engineering},
   contact = {Per Mail an laura.kassner@gsame.uni-stuttgart.de},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Enormous amounts of unstructured data, e. g., emails, failure reports and customer complaints, are abundant around the product life cycle and provide a huge potential for analytics-driven optimization. However, existing analytics approaches on unstructured data are fraught with three major insufficiencies limiting comprehensive business improvement: (1) they focus on isolated data sources from a single life cycle phase {\^a}€“ for example, data from a customer relationship management system are mined for frequent complaints without considering manufacturing failure reports related to the same product; (2) they do not make use of structured data for holistic analytics, e. g., to automatically correlate unstructured failure reports with structured performance data of a manufacturing execution system; (3) existing implementations of data integration and analytics components are typically cost-intensive, manual and case-based, without a general framework. To address these issues, we present our Product Life Cycle Analytics (PLCA) approach, a platform and a reference architecture for the holistic integration and analysis of unstructured and structured data from multiple data sources around the product life cycle. For this purpose, we survey structured and unstructured data sources around the product life cycle and discuss limitations of existing analytics approaches like traditional Business Intelligence applications. Moreover, we develop use cases for holistic life-cycle-oriented analytics and give examples based on case study investigations, e. g., for the holistic analysis of unstructured failure reports in the automotive industry. On this basis, we discuss technical requirements and components of our reference architecture, such as a versatile, plug-and-play Natural Language Processing pipeline and mechanisms for linking structured and unstructured data in a holistic data warehouse. Finally, we analyse implementation issues and investigate underlying technologies from the areas of text analytics and data mining in order to evaluate our architecture with respect to the identified use cases.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2014-59&engl=0}
}
@inproceedings {INPROC-2014-49,
   author = {Christoph Gr{\"o}ger and Holger Schwarz and Bernhard Mitschang},
   title = {{The Deep Data Warehouse. Link-based Integration and Enrichment of Warehouse Data and Unstructured Content}},
   booktitle = {Proceedings of the 18th IEEE International Enterprise Distributed Object Computing Conference (EDOC), 01-05 September, 2014, Ulm, Germany},
   publisher = {IEEE},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   type = {Konferenz-Beitrag},
   month = {September},
   year = {2014},
   language = {Englisch},
   cr-category = {H.2.7 Database Administration},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Data warehouses are at the core of enterprise IT and enable the efficient storage and analysis of structured data. Besides, unstructured content, e.g., emails and documents, constitutes more than half of the entire enterprise data and contains a lot of implicit knowledge about warehouse entities. Thus, holistic ana-lytics require the integration of structured warehouse data and unstructured content to generate novel insights. These insights can also be used to enrich the integrated data and to create a new basis for further analytics. Existing integration approaches only support a limited range of analytical applications and require the costly adaptation of the warehouse schema. In this paper, we present the Deep Data Warehouse (DeepDWH), a novel type of data warehouse based on the flexible integration and enrichment of warehouse data and unstructured content, addressing the variety challenge of Big Data. It relies on information-rich in-stance-level links between warehouse elements and content items, which are represented in a graph-oriented structure. Neither adaptations of the existing warehouse nor the design of an overall federated schema are required. We design a conceptual linking model and develop a logical schema for links based on a property graph. As a proof of concept, we present a prototypical imple-mentation of the DeepDWH including a link store based on a graph database.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2014-49&engl=0}
}
@inproceedings {INPROC-2014-28,
   author = {Christoph Gr{\"o}ger and Holger Schwarz and Bernhard Mitschang},
   title = {{Prescriptive Analytics for Recommendation-based Business Process Optimization}},
   booktitle = {Proceedings of the 17th International Conference on Business Information Systems (BIS), 22-23 May, 2014, Larnaca, Cyprus},
   editor = {Witold Abramowicz and Angelika Kokkinaki},
   publisher = {Springer},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   series = {176},
   pages = {25--37},
   type = {Konferenz-Beitrag},
   month = {Mai},
   year = {2014},
   keywords = {Prescriptive Analytics, Process Optimization, Process Warehouse, Data Mining, Business Intelligence, Decision Support},
   language = {Englisch},
   cr-category = {H.2.8 Database Applications},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Continuously improved business processes are a central success factor for companies. Yet, existing data analytics do not fully exploit the data generated during process execution. Particularly, they miss prescriptive techniques to transform analysis results into improvement actions. In this paper, we present the data-mining-driven concept of recommendation-based business process op-timization on top of a holistic process warehouse. It prescriptively generates ac-tion recommendations during process execution to avoid a predicted metric de-viation. We discuss data mining techniques and data structures for real-time prediction and recommendation generation and present a proof of concept based on a prototypical implementation in manufacturing.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2014-28&engl=0}
}
@inproceedings {INPROC-2014-14,
   author = {Eva Hoos and Christoph Gr{\"o}ger and Stefan Kramer and Bernhard Mitschang},
   title = {{Improving Business Processes through Mobile Apps - An Analysis Framework to Identify Value-added App Usage Scenarios}},
   booktitle = {Proceedings of the 16th International Conference on Enterprise Information Systems (ICEIS), 27-30 April, 2014, Lisbon, Portugal},
   publisher = {SciTePress},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   type = {Konferenz-Beitrag},
   month = {April},
   year = {2014},
   keywords = {Business Processes; Analysis Framework; Mobile Application},
   language = {Englisch},
   cr-category = {H.1.1 Systems and Information Theory,     K.6.1 Project and People Management},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Mobile apps offer new possibilities to improve business processes. However, the introduction of mobile apps is typically carried out from a technology point of view. Hence, process improvement from a business point of view is not guaranteed. There is a methodological lack for a holistic analysis of business processes regarding mobile technology. For this purpose, we present an analysis framework, which comprises a systematic methodology to identify value-added usage scenarios of mobile technology in business processes with a special focus on mobile apps. The framework is based on multi-criteria analysis and portfolio analy- sis techniques and it is evaluated in a case-oriented investigation in the automotive industry.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2014-14&engl=0}
}
@inproceedings {INPROC-2014-10,
   author = {Christoph Gr{\"o}ger and Holger Schwarz and Bernhard Mitschang},
   title = {{The Manufacturing Knowledge Repository. Consolidating Knowledge to Enable Holistic Process Knowledge Management in Manufacturing}},
   booktitle = {Proceedings of the 16th International Conference on Enterprise Information Systems (ICEIS), 27-30 April, 2014, Lisbon, Portugal},
   publisher = {SciTePress},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   type = {Konferenz-Beitrag},
   month = {April},
   year = {2014},
   language = {Englisch},
   cr-category = {H.2.7 Database Administration,     J.1 Administration Data Processing},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {The manufacturing industry is faced with strong competition making the companies’ knowledge resources and their systematic management a critical success factor. Yet, existing concepts for the management of process knowledge in manufacturing are characterized by major shortcomings. Particularly, they are either exclusively based on structured knowledge, e. g., formal rules, or on unstructured knowledge, such as documents, and they focus on isolated aspects of manufacturing processes. To address these issues, we present the Manufacturing Knowledge Repository, a holistic repository that consolidates structured and unstructured process knowledge to facilitate knowledge management and process optimization in manufacturing. First, we define requirements, especially the types of knowledge to be handled, e. g., data mining models and text documents. Next, we develop a conceptual repository data model associating knowledge items and process components such as machines and process steps. Furthermore, we discuss implementation issues including storage architecture variants and present both an evaluation of the data model and a proof of concept based on a prototypical implementation.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2014-10&engl=0}
}
@inproceedings {INPROC-2014-09,
   author = {Christoph Gr{\"o}ger and Christoph Stach},
   title = {{The Mobile Manufacturing Dashboard}},
   booktitle = {Proceedings of the 2014 IEEE International Conference on Pervasive Computing and Communications Workshops (PerCom Workshops), 24-28 March, 2014, Budapest, Hungary},
   publisher = {IEEE},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   type = {Demonstration},
   month = {M{\"a}rz},
   year = {2014},
   language = {Englisch},
   cr-category = {J.1 Administration Data Processing},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Real-time monitoring and analysis of manufacturing processes are critical success factors in the smart factory. While there is a variety of data analytics tools for process optimization, almost each of these applications is designed for desktop PCs and focuses on selected process aspects, only. I. e., there is a gap between the site the analysis outcomes occur (the management level) and the site where an immediate reaction to these results is required (the factory shop floor). Even worse, there is no mobile, holistic and analytics-based information provisioning tool for workers and production supervisors on the shop floor but rudimentary systems designed for limited application areas, only. Therefore, we introduce our Mobile Manufacturing Dashboard (MMD), a situation-aware manufacturing dashboard for mobile devices. The MMD provides advanced analytics and addresses the full range of process-oriented information needs of both shop floor workers and production supervisors. In this paper, we give a brief overview of the MMD’s major architecture and implementation aspects and describe two representative real-world scenarios for the MMD. These characteristic scenarios target shop floor workers and production supervisors and illustrate situation-aware information provisioning in the smart factory.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2014-09&engl=0}
}
@inproceedings {INPROC-2013-15,
   author = {Christoph Gr{\"o}ger and Mark Hillmann and Friedemann Hahn and Bernhard Mitschang and Engelbert Westk{\"a}mper},
   title = {{The Operational Process Dashboard for Manufacturing}},
   booktitle = {Proceedings of the 46th CIRP Conference on Manufacturing Systems (CMS2013), 29-31 May, 2013, Sesimbra, Portugal},
   publisher = {Elsevier},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   type = {Konferenz-Beitrag},
   month = {Mai},
   year = {2013},
   language = {Englisch},
   cr-category = {J.1 Administration Data Processing},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Agility is a critical success factor for manufacturers in today’s volatile global environment and requires employees monitoring their performance and reacting quickly to turbulences. Thus, comprehensive information provisioning on all hierarchy levels is necessary. Yet, existing IT systems, e. g., Manufacturing Execution Systems, scarcely address information needs of workers on the shop floor level. This causes uncoordinated waiting times, inflexibility and costly communication. To address these issues, we present the Operational Process Dashboard for Manufacturing (OPDM), a mobile dashboard for shop floor workers. We identify process-oriented information needs, develop technical dashboard services and define IT requirements for an implementation.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2013-15&engl=0}
}
@inproceedings {INPROC-2013-14,
   author = {Christoph Gr{\"o}ger and Stefan Silcher and Engelbert Westk{\"a}mper and Bernhard Mitschang},
   title = {{Leveraging Apps in Manufacturing. A Framework for App Technology in the Enterprise}},
   booktitle = {Proceedings of the 46th CIRP Conference on Manufacturing Systems (CMS2013), 29-31 May, 2013, Sesimbra, Portugal},
   publisher = {Elsevier},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   type = {Konferenz-Beitrag},
   month = {Mai},
   year = {2013},
   language = {Englisch},
   cr-category = {J.1 Administration Data Processing},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Apps on mobile devices like smartphones have become the core of the digital life of consumers. Apps are used, e. g., for shopping or communicating in social networks. Recently, apps are gaining more and more attention in enterprises as enabler for agile process optimization. In this article, we discuss the potentials and challenges of exploiting this technology with a focus on the manufacturing industry. We come up with a framework for apps in manufacturing companies and identify major areas that need further investigations to fully leverage apps. Moreover, we present existing and novel apps across the product life cycle.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2013-14&engl=0}
}
@inproceedings {INPROC-2012-31,
   author = {Christoph Gr{\"o}ger and Johannes Schlaudraff and Florian Niedermann and Bernhard Mitschang},
   title = {{Warehousing Manufacturing Data. A Holistic Process Warehouse for Advanced Manufacturing Analytics}},
   booktitle = {Proceedings of the 14th International Conference on Data Warehousing and Knowledge Discovery - DaWaK 2012},
   editor = {Alfredo Cuzzocrea and Umeshwar Dayal},
   address = {Berlin, Heidelberg},
   publisher = {Springer},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   series = {Lecture Notes in Computer Science},
   volume = {7448},
   pages = {142--155},
   type = {Konferenz-Beitrag},
   month = {September},
   year = {2012},
   keywords = {Data Warehouse; Manufacturing; Process Optimization; Analytics; Business Intelligence; Data Integration},
   language = {Englisch},
   cr-category = {H.2.7 Database Administration,     J.1 Administration Data Processing},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Strong competition in the manufacturing industry makes efficient and effective manufacturing processes a critical success factor. However, existing warehousing and analytics approaches in manufacturing are coined by substantial shortcomings, significantly preventing comprehensive process improvement. Especially, they miss a holistic data base integrating operational and process data, e. g., from Manufacturing Execution and Enterprise Resource Planning systems. To address this challenge, we introduce the Manufacturing Warehouse, a concept for a holistic manufacturing-specific process warehouse as central part of the overall Advanced Manufacturing Analytics Platform. We define a manufacturing process meta model and deduce a universal warehouse model. In addition, we develop a procedure for its instantiation and the integration of concrete source data. Finally, we describe a first proof of concept based on a prototypical implementation.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2012-31&engl=0}
}
@inproceedings {INPROC-2012-15,
   author = {Christoph Gr{\"o}ger and Florian Niedermann and Bernhard Mitschang},
   title = {{Data Mining-driven Manufacturing Process Optimization}},
   booktitle = {Proceedings of the World Congress on Engineering 2012 Vol III, WCE 2012, 4 – 6 July, 2012, London, U.K.},
   editor = {S. I. Ao and L. Gelman and D. W. L. Hukins and A. Hunter and A. M. Korsunsky},
   publisher = {Newswood},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {1475--1481},
   type = {Konferenz-Beitrag},
   month = {Juli},
   year = {2012},
   isbn = {978-988-19252-2-0},
   keywords = {Analytics; Data Mining; Decision Support; Process Optimization},
   language = {Englisch},
   cr-category = {H.2.8 Database Applications,     J.1 Administration Data Processing},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {High competitive pressure in the global manufacturing industry makes efficient, effective and continuously improved manufacturing processes a critical success factor. Yet, existing analytics in manufacturing, e. g., provided by Manufacturing Execution Systems, are coined by major shortcomings considerably limiting continuous process improvement. In particular, they do not make use of data mining to identify hidden patterns in manufacturing-related data. In this article, we present indication-based and pattern-based manufacturing process optimization as novel data mining approaches provided by the Advanced Manufacturing Analytics Platform. We demonstrate their usefulness through use cases and depict suitable data mining techniques as well as implementation details.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2012-15&engl=0}
}
@inproceedings {INPROC-2012-14,
   author = {Christoph Gr{\"o}ger and Florian Niedermann and Holger Schwarz and Bernhard Mitschang},
   title = {{Supporting Manufacturing Design by Analytics. Continuous Collaborative Process Improvement enabled by the Advanced Manufacturing Analytics Platform}},
   booktitle = {Proceedings of the 2012 16th IEEE International Conference on Computer Supported Cooperative Work in Design (CSCWD), May 23-25, 2012, Wuhan, China},
   editor = {Liang Gao and Weiming Shen and Jean-Paul Barth{\`e}s and Junzhou Luo and Jianming Yong and Wenfeng Li and Weidong Li},
   publisher = {IEEE},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {793--799},
   type = {Konferenz-Beitrag},
   month = {Mai},
   year = {2012},
   isbn = {978-1-4673-1210-3},
   keywords = {Analytics; Data Mining; Process Management; Manufacturing; Process Optimization},
   language = {Englisch},
   cr-category = {H.2.8 Database Applications,     J.1 Administration Data Processing},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {The manufacturing industry is faced with global competition making efficient, effective and continuously improved manufacturing processes a critical success factor. Yet, media discontinuities, the use of isolated analysis methods on local data sets as well as missing means for sharing analysis results cause a collaborative gap in Manufacturing Process Management that prohibits continuous process improvement. To address this chal-lenge, this paper proposes the Advanced Manufacturing Analytics (AdMA) Platform that bridges the gap by integrating operational and process manufacturing data, defining a reposito-ry for analysis results and providing indication-based and pat-tern-based optimization techniques. Both the conceptual architec-ture underlying the platform as well as its current implementa-tion are presented in this paper.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2012-14&engl=0}
}
@article {ART-2024-01,
   author = {Jan Schneider and Christoph Gr{\"o}ger and Arnold Lutsch and Holger Schwarz and Bernhard Mitschang},
   title = {{The Lakehouse: State of the Art on Concepts and Technologies}},
   journal = {SN Computer Science},
   publisher = {Springer Nature},
   volume = {5},
   number = {5},
   pages = {1--39},
   type = {Artikel in Zeitschrift},
   month = {April},
   year = {2024},
   issn = {2661-8907},
   doi = {10.1007/s42979-024-02737-0},
   keywords = {Data Lakehouse; Data Lake; Data Platform; Data Analytics},
   language = {Englisch},
   cr-category = {H.3.4 Information Storage and Retrieval Systems and Software},
   ee = {https://doi.org/10.1007/s42979-024-02737-0,     https://link.springer.com/content/pdf/10.1007/s42979-024-02737-0.pdf},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {In the context of data analytics, so-called lakehouses refer to novel variants of data platforms that attempt to combine characteristics of data warehouses and data lakes. In this way, lakehouses promise to simplify enterprise analytics architectures, which often suffer from high operational costs, slow analytical processes and further shortcomings resulting from data replication. However, different views and notions on the lakehouse paradigm exist, which are commonly driven by individual technologies and varying analytical use cases. Therefore, it remains unclear what challenges lakehouses address, how they can be characterized and which technologies can be leveraged to implement them. This paper addresses these issues by providing an extensive overview of concepts and technologies that are related to the lakehouse paradigm and by outlining lakehouses as a distinct architectural approach for data platforms. Concepts and technologies from literature with regard to lakehouses are discussed, based on which a conceptual foundation for lakehouses is established. In addition, several popular technologies are evaluated regarding their suitability for the building of lakehouses. All findings are supported and demonstrated with the help of a representative analytics scenario. Typical challenges of conventional data platforms are identified, a new, sharper definition for lakehouses is proposed and technical requirements for lakehouses are derived. As part of an evaluation, these requirements are applied to several popular technologies, of which frameworks for data lakes turn out to be particularly helpful for the construction of lakehouses. Our work provides an overview of the state of the art and a conceptual foundation for the lakehouse paradigm, which can support future research.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2024-01&engl=0}
}
@article {ART-2023-07,
   author = {Rebecca Eichler and Christoph Gr{\"o}ger and Eva Hoos and Christoph Stach and Holger Schwarz and Bernhard Mitschang},
   title = {{Introducing the enterprise data marketplace: a platform for democratizing company data}},
   journal = {Journal of Big Data},
   publisher = {Springer Nature},
   volume = {10},
   pages = {1--38},
   type = {Artikel in Zeitschrift},
   month = {November},
   year = {2023},
   issn = {2196-1115},
   doi = {10.1186/s40537-023-00843-z},
   keywords = {Data Catalog; Data Democratization; Data Market; Data Sharing; Enterprise Data Marketplace; Metadata Management},
   language = {Englisch},
   cr-category = {E.m Data Miscellaneous,     H.3.7 Digital Libraries,     H.4.m Information Systems Applications Miscellaneous},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {In this big data era, multitudes of data are generated and collected which contain the potential to gain new insights, e.g., for enhancing business models. To leverage this potential through, e.g., data science and analytics projects, the data must be made available. In this context, data marketplaces are used as platforms to facilitate the exchange and thus, the provisioning of data and data-related services. Data marketplaces are mainly studied for the exchange of data between organizations, i.e., as external data marketplaces. Yet, the data collected within a company also has the potential to provide valuable insights for this same company, for instance to optimize business processes. Studies indicate, however, that a significant amount of data within companies remains unused. In this sense, it is proposed to employ an Enterprise Data Marketplace, a platform to democratize data within a company among its employees. Specifics of the Enterprise Data Marketplace, how it can be implemented or how it makes data available throughout a variety of systems like data lakes has not been investigated in literature so far. Therefore, we present the characteristics and requirements of this kind of marketplace. We also distinguish it from other tools like data catalogs, provide a platform architecture and highlight how it integrates with the company{\^a}€™s system landscape. The presented concepts are demonstrated through an Enterprise Data Marketplace prototype and an experiment reveals that this marketplace significantly improves the data consumer workflows in terms of efficiency and complexity. This paper is based on several interdisciplinary works combining comprehensive research with practical experience from an industrial perspective. We therefore present the Enterprise Data Marketplace as a distinct marketplace type and provide the basis for establishing it within a company.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2023-07&engl=0}
}
@article {ART-2021-03,
   author = {Rebecca Eichler and Corinna Giebler and Christoph Gr{\"o}ger and Holger Schwarz and Bernhard Mitschang},
   title = {{Modeling metadata in data lakes—A generic model}},
   journal = {Data \& Knowledge Engineering},
   publisher = {Elsevier},
   volume = {136},
   pages = {1--17},
   type = {Artikel in Zeitschrift},
   month = {November},
   year = {2021},
   issn = {0169-023X},
   doi = {10.1016/j.datak.2021.101931},
   keywords = {Metadata management; Metadata model; Data lake; Data management; Data lake zones; Metadata classification},
   language = {Englisch},
   cr-category = {H.2 Database Management},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Data contains important knowledge and has the potential to provide new insights. Due to new technological developments such as the Internet of Things, data is generated in increasing volumes. In order to deal with these data volumes and extract the data{\^a}{\^a}‚¬{\^a}„˘s value new concepts such as the data lake were created. The data lake is a data management platform designed to handle data at scale for analytical purposes. To prevent a data lake from becoming inoperable and turning into a data swamp, metadata management is needed. To store and handle metadata, a generic metadata model is required that can reflect metadata of any potential metadata management use case, e.g., data versioning or data lineage. However, an evaluation of existent metadata models yields that none so far are sufficiently generic as their design basis is not suited. In this work, we use a different design approach to build HANDLE, a generic metadata model for data lakes. The new metadata model supports the acquisition of metadata on varying granular levels, any metadata categorization, including the acquisition of both metadata that belongs to a specific data element as well as metadata that applies to a broader range of data. HANDLE supports the flexible integration of metadata and can reflect the same metadata in various ways according to the intended utilization. Furthermore, it is created for data lakes and therefore also supports data lake characteristics like data lake zones. With these capabilities HANDLE enables comprehensive metadata management in data lakes. HANDLE{\^a}{\^a}‚¬{\^a}„˘s feasibility is shown through the application to an exemplary access-use-case and a prototypical implementation. By comparing HANDLE with existing models we demonstrate that it can provide the same information as the other models as well as adding further capabilities needed for metadata management in data lakes.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2021-03&engl=0}
}
@article {ART-2020-20,
   author = {Corinna Giebler and Christoph Gr{\"o}ger and Eva Hoos and Rebecca Eichler and Holger Schwarz and Bernhard Mitschang},
   title = {{Data Lakes auf den Grund gegangen - Herausforderungen und Forschungsl{\"u}cken in der Industriepraxis}},
   journal = {Datenbank Spektrum},
   publisher = {Springer},
   volume = {20},
   pages = {57--69},
   type = {Artikel in Zeitschrift},
   month = {Januar},
   year = {2020},
   keywords = {Data Lakes; Analytics; Stand der Technik; Herausforderungen; Praxisbeispiel},
   language = {Deutsch},
   cr-category = {H.4 Information Systems Applications},
   contact = {Senden Sie eine E-Mail an Corinna.Giebler@ipvs.uni-stuttgart.de},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Unternehmen stehen zunehmend vor der Herausforderung, gro{\ss}e, heterogene Daten zu verwalten und den darin enthaltenen Wert zu extrahieren. In den letzten Jahren kam darum der Data Lake als neuartiges Konzept auf, um diese komplexen Daten zu verwalten und zu nutzen. Wollen Unternehmen allerdings einen solchen Data Lake praktisch umsetzen, so sto{\ss}en sie auf vielf{\"a}ltige Herausforderungen, wie beispielsweise Widerspr{\"u}che in der Definition oder unscharfe und fehlende Konzepte. In diesem Beitrag werden konkrete Projekte eines global agierenden Industrieunternehmens genutzt, um bestehende Herausforderungen zu identifizieren und Anforderungen an Data Lakes herzuleiten. Diese Anforderungen werden mit der verf{\"u}gbaren Literatur zum Thema Data Lake sowie mit existierenden Ans{\"a}tzen aus der Forschung abgeglichen. Die Gegen{\"u}berstellung zeigt, dass f{\"u}nf gro{\ss}e Forschungsl{\"u}cken bestehen: 1. Unklare Datenmodellierungsmethoden, 2. Fehlende Data-Lake-Referenzarchitektur, 3. Unvollst{\"a}ndiges Metadatenmanagementkonzept, 4. Unvollst{\"a}ndiges Data-Lake-Governance-Konzept, 5. Fehlende ganzheitliche Realisierungsstrategie.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2020-20&engl=0}
}
@article {ART-2020-11,
   author = {Corinna Giebler and Christoph Gr{\"o}ger and Eva Hoos and Rebecca Eichler and Holger Schwarz and Bernhard Mitschang},
   title = {{Data Lakes auf den Grund gegangen - Herausforderungen und Forschungsl{\"u}cken in der Industriepraxis}},
   journal = {Datenbank Spektrum},
   publisher = {Springer-Verlag},
   volume = {20},
   pages = {57--69},
   type = {Artikel in Zeitschrift},
   month = {Januar},
   year = {2020},
   keywords = {Data Lakes; Industryerfahrung},
   language = {Deutsch},
   cr-category = {H.2.1 Database Management Logical Design},
   contact = {Senden Sie eine E-Mail an Corinna.Giebler@ipvs.uni-stuttgart.de},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Unternehmen stehen zunehmend vor der Herausforderung, gro{\ss}e, heterogene Daten zu verwalten und den darin enthaltenen Wert zu extrahieren. In den letzten Jahren kam darum der Data Lake als neuartiges Konzept auf, um diese komplexen Daten zu verwalten und zu nutzen. Wollen Unternehmen allerdings einen solchen Data Lake praktisch umsetzen, so sto{\ss}en sie auf vielf{\"a}ltige Herausforderungen, wie beispielsweise Widerspr{\"u}che in der Definition oder unscharfe und fehlende Konzepte. In diesem Beitrag werden konkrete Projekte eines global agierenden Industrieunternehmens genutzt, um bestehende Herausforderungen zu identifizieren und Anforderungen an Data Lakes herzuleiten. Diese Anforderungen werden mit der verf{\"u}gbaren Literatur zum Thema Data Lake sowie mit existierenden Ans{\"a}tzen aus der Forschung abgeglichen. Die Gegen{\"u}berstellung zeigt, dass f{\"u}nf gro{\ss}e Forschungsl{\"u}cken bestehen: 1. Unklare Datenmodellierungsmethoden, 2. Fehlende Data-Lake-Referenzarchitektur, 3. Unvollst{\"a}ndiges Metadatenmanagementkonzept, 4. Unvollst{\"a}ndiges Data-Lake-Governance-Konzept, 5. Fehlende ganzheitliche Realisierungsstrategie.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2020-11&engl=0}
}
@article {ART-2020-10,
   author = {Corinna Giebler and Christoph Gr{\"o}ger and Eva Hoos and Rebecca Eichler and Holger Schwarz and Bernhard Mitschang},
   title = {{Data Lakes auf den Grund gegangen - Herausforderungen und Forschungsl{\"u}cken in der Industriepraxis}},
   journal = {Datenbank Spektrum},
   publisher = {Springer-Verlag},
   volume = {20},
   pages = {57--69},
   type = {Artikel in Zeitschrift},
   month = {Januar},
   year = {2020},
   keywords = {Data Lakes; Industryerfahrung},
   language = {Deutsch},
   cr-category = {H.2.1 Database Management Logical Design},
   contact = {Senden Sie eine E-Mail an Corinna.Giebler@ipvs.uni-stuttgart.de},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Unternehmen stehen zunehmend vor der Herausforderung, gro{\ss}e, heterogene Daten zu verwalten und den darin enthaltenen Wert zu extrahieren. In den letzten Jahren kam darum der Data Lake als neuartiges Konzept auf, um diese komplexen Daten zu verwalten und zu nutzen. Wollen Unternehmen allerdings einen solchen Data Lake praktisch umsetzen, so sto{\ss}en sie auf vielf{\"a}ltige Herausforderungen, wie beispielsweise Widerspr{\"u}che in der Definition oder unscharfe und fehlende Konzepte. In diesem Beitrag werden konkrete Projekte eines global agierenden Industrieunternehmens genutzt, um bestehende Herausforderungen zu identifizieren und Anforderungen an Data Lakes herzuleiten. Diese Anforderungen werden mit der verf{\"u}gbaren Literatur zum Thema Data Lake sowie mit existierenden Ans{\"a}tzen aus der Forschung abgeglichen. Die Gegen{\"u}berstellung zeigt, dass f{\"u}nf gro{\ss}e Forschungsl{\"u}cken bestehen: 1. Unklare Datenmodellierungsmethoden, 2. Fehlende Data-Lake-Referenzarchitektur, 3. Unvollst{\"a}ndiges Metadatenmanagementkonzept, 4. Unvollst{\"a}ndiges Data-Lake-Governance-Konzept, 5. Fehlende ganzheitliche Realisierungsstrategie.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2020-10&engl=0}
}
@article {ART-2020-04,
   author = {Corinna Giebler and Christoph Gr{\"o}ger and Eva Hoos and Rebecca Eichler and Holger Schwarz and Bernhard Mitschang},
   title = {{Data Lakes auf den Grund gegangen: Herausforderungen und Forschungsl{\"u}cken in der Industriepraxis}},
   journal = {Datenbank-Spektrum},
   publisher = {Springer},
   volume = {20},
   number = {1},
   pages = {57--69},
   type = {Artikel in Zeitschrift},
   month = {Januar},
   year = {2020},
   doi = {10.1007/s13222-020-00332-0},
   keywords = {Data Lake; Analytics; Stand der Technik; Herausforderungen; Praxisbeispiel},
   language = {Deutsch},
   cr-category = {A.1 General Literature, Introductory and Survey,     E.0 Data General},
   ee = {https://rdcu.be/b0WM8},
   contact = {Senden Sie eine E-Mail an Corinna.Giebler@ipvs.uni-stuttgart.de},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Unternehmen stehen zunehmend vor der Herausforderung, gro{\ss}e, heterogene Daten zu verwalten und den darin enthaltenen Wert zu extrahieren. In den letzten Jahren kam darum der Data Lake als neuartiges Konzept auf, um diese komplexen Daten zu verwalten und zu nutzen. Wollen Unternehmen allerdings einen solchen Data Lake praktisch umsetzen, so sto{\ss}en sie auf vielf{\"a}ltige Herausforderungen, wie beispielsweise Widerspr{\"u}che in der Definition oder unscharfe und fehlende Konzepte. In diesem Beitrag werden konkrete Projekte eines global agierenden Industrieunternehmens genutzt, um bestehende Herausforderungen zu identifizieren und Anforderungen an Data Lakes herzuleiten. Diese Anforderungen werden mit der verf{\"u}gbaren Literatur zum Thema Data Lake sowie mit existierenden Ans{\"a}tzen aus der Forschung abgeglichen. Die Gegen{\"u}berstellung zeigt, dass f{\"u}nf gro{\ss}e Forschungsl{\"u}cken bestehen: 1. Unklare Datenmodellierungsmethoden, 2. Fehlende Data-Lake-Referenzarchitektur, 3. Unvollst{\"a}ndiges Metadatenmanagementkonzept, 4. Unvollst{\"a}ndiges Data-Lake-Governance-Konzept, 5. Fehlende ganzheitliche Realisierungsstrategie.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2020-04&engl=0}
}
@article {ART-2016-06,
   author = {Christoph Gr{\"o}ger and Christoph Stach and Bernhard Mitschang and Engelbert Westk{\"a}mper},
   title = {{A mobile dashboard for analytics-based information provisioning on the shop floor}},
   journal = {International Journal of Computer Integrated Manufacturing},
   publisher = {Taylor \& Francis Inc.},
   pages = {1--20},
   type = {Artikel in Zeitschrift},
   month = {Mai},
   year = {2016},
   doi = {10.1080/0951192X.2016.1187292},
   keywords = {dashboard; cockpit; process optimisation; data analytics; business intelligence; data mining},
   language = {Englisch},
   cr-category = {H.4.0 Information Systems Applications General,     J.2 Physical Sciences and Engineering},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Today's turbulent global environment requires agility and flexibility of manufacturing companies to stay competitive. Thus, employees have to monitor their performance continuously and react quickly to turbulences which demands real-time information provisioning across all hierarchy levels. However, existing manufacturing IT systems, for example, manufacturing execution systems (MES), do hardly address information needs of individual employees on the shop floor. Besides, they do not exploit advanced analytics to generate novel insights for process optimisation. To address these issues, the operational process dashboard for manufacturing (OPDM) is presented, a mobile data-mining-based dashboard for workers and supervisors on the shop floor. It enables proactive optimisation by providing analytical information anywhere and anytime in the factory. In this paper, first, user groups and conceptual dashboard services are defined. Then, IT design issues of a mobile shop floor application on top of the advanced manufacturing analytics platform are investigated in order to realise the OPDM. This comprises the evaluation of different types of mobile devices, the development of an appropriate context model and the investigation of security issues. Finally, an evaluation in an automotive industry case is presented using a prototype in order to demonstrate the benefits of the OPDM for data-driven process improvement and agility in manufacturing.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2016-06&engl=0}
}
@inbook {INBOOK-2021-01,
   author = {Dimitri Petrik and Mathias Mormul and Peter Reimann and Christoph Gr{\"o}ger},
   title = {{Anforderungen f{\"u}r Zeitreihendatenbanken im industriellen IoT}},
   series = {IoT – Best Practices},
   publisher = {Springer-Verlag},
   pages = {339--377},
   type = {Beitrag in Buch},
   month = {Mai},
   year = {2021},
   keywords = {Zeitreihendaten; Zeitreihendatenbanken; Industrial IoT; Edge Computing; Data Lake; InfluxDB},
   language = {Deutsch},
   cr-category = {H.2.8 Database Applications},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Das industrielle Internet der Dinge (IIoT) integriert Informations- und Kommunikationstechnologien in industrielle Prozesse und erweitert sie durch Echtzeit-Datenanalyse. Hierbei sind sensorbasierte Zeitreihen ein wesentlicher Typ von Daten, die in der industriellen Fertigung generiert werden. Sensorbasierte Zeitreihendaten werden in regelm{\"a}{\ss}igen Abst{\"a}nden generiert und enthalten zus{\"a}tzlich zum Sensorwert einen Zeitstempel. Spezielle Zeitreihen-Datenbanken (eng.: Time Series Databases (TSDB)) sind daf{\"u}r ausgelegt, Zeitreihendaten effizient zu speichern. Wenn TSDBs maschinennah, d. h. in der industriellen Edge, eingesetzt werden, sind Maschinendaten zur {\"U}berwachung zeitkritischer Prozesse aufgrund der niedrigen Latenz schnell verf{\"u}gbar, was die erforderliche Zeit f{\"u}r die Datenverarbeitung reduziert. Andererseits k{\"o}nnen TSDBs auch in den Data Lakes als skalierbaren Datenplattformen zur Speicherung und Analyse von Rohdaten zum Einsatz kommen, um die langfristige Vorhaltung von Zeitreihendaten zu erm{\"o}glichen. Bisherige Untersuchungen zu TSDBs sind bei der Auswahl f{\"u}r den Einsatz in der industriellen Edge und im Data Lake nicht vorhanden. Die meisten verf{\"u}gbaren Benchmarks von TSDBs sind performanceorientiert und ber{\"u}cksichtigen nicht die Randbedingungen einer industriellen Edge oder eines Data Lake. Wir adressieren diese L{\"u}cke und identifizieren funktionale Kriterien f{\"u}r den Einsatz von TSDBs in diesen beiden Umgebungen und bilden somit einen qualitativen Kriterienkatalog. Des Weiteren zeigen wir am Beispiel von InfluxDB, wie dieser Katalog verwendet werden kann, mit dem Ziel die systematische Auswahl einer passenden TSDB f{\"u}r den Einsatz in der Edge und im Data Lake zu unterst{\"u}tzen.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INBOOK-2021-01&engl=0}
}
@inbook {INBOOK-2017-04,
   author = {Laura Kassner and Christoph Gr{\"o}ger and Jan K{\"o}nigsberger and Eva Hoos and Cornelia Kiefer and Christian Weber and Stefan Silcher and Bernhard Mitschang},
   title = {{The Stuttgart IT Architecture for Manufacturing}},
   series = {Enterprise Information Systems: 18th International Conference, ICEIS 2016, Rome, Italy, April 25--28, 2016, Revised Selected Papers},
   publisher = {Springer International Publishing},
   series = {Lecture Notes in Business Information Processing},
   volume = {291},
   pages = {53--80},
   type = {Beitrag in Buch},
   month = {Juni},
   year = {2017},
   isbn = {978-3-319-62386-3},
   doi = {10.1007/978-3-319-62386-3_3},
   language = {Englisch},
   cr-category = {H.4.0 Information Systems Applications General,     D.2.12 Software Engineering Interoperability,     J.2 Physical Sciences and Engineering},
   ee = {https://link.springer.com/chapter/10.1007/978-3-319-62386-3_3},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {The global conditions for manufacturing are rapidly changing towards shorter product life cycles, more complexity and more turbulence. The manufacturing industry must meet the demands of this shifting environment and the increased global competition by ensuring high product quality, continuous improvement of processes and increasingly flexible organization. Technological developments towards smart manufacturing create big industrial data which needs to be leveraged for competitive advantages. We present a novel IT architecture for data-driven manufacturing, the Stuttgart IT Architecture for Manufacturing (SITAM). It addresses the weaknesses of traditional manufacturing IT by providing IT systems integration, holistic data analytics and mobile information provisioning. The SITAM surpasses competing reference architectures for smart manufacturing because it has a strong focus on analytics and mobile integration of human workers into the smart production environment and because it includes concrete recommendations for technologies to implement it, thus filling a granularity gap between conceptual and case-based architectures. To illustrate the benefits of the SITAM{\^a}€™s prototypical implementation, we present an application scenario for value-added services in the automotive industry.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INBOOK-2017-04&engl=0}
}
@inbook {INBOOK-2015-02,
   author = {Eva Hoos and Christoph Gr{\"o}ger and Stefan Kramer and Bernhard Mitschang},
   title = {{ValueApping: An Analysis Method to Identify Value-Adding Mobile Enterprise Apps in Business Processes}},
   series = {Enterprise Information Systems},
   publisher = {Springer International Publishing},
   series = {Lecture Notes in Business Information Processing},
   volume = {227},
   type = {Beitrag in Buch},
   month = {September},
   year = {2015},
   language = {Englisch},
   cr-category = {H.1.1 Systems and Information Theory},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Mobile enterprise apps provide novel possibilities for the optimization and redesign of business processes, e.g., by the elimination of paper-based data acquisitioning or ubiquitous access to up-to-date information. To leverage these business potentials, a critical success factor is the identification and evaluation of valueadding MEAs based on an analysis of the business process. For this purpose, we present ValueApping, a systematic analysis method to identify usage scenarios for value-adding mobile enterprise apps in business processes and to analyze their business benefits. We describe the different analysis steps and corresponding analysis artifacts of ValueApping and discuss the results of a case-oriented evaluation in the automotive industry.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INBOOK-2015-02&engl=0}
}