@inproceedings {INPROC-2024-10,
    author = {Dennis Treder-Tschechlov and Manuel Fritz and Holger Schwarz and Bernhard Mitschang},
    title = {{Ensemble Clustering based on Meta-Learning and Hyperparameter Optimization}},
    booktitle = {Proc. VLDB Endow. 17, 11 (July 2024), 2880–2892.},
    editor = {Proceedings of the VLDB Endowment},
    publisher = {ACM},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    pages = {2880--2892},
    type = {Konferenz-Beitrag},
    month = {August},
    year = {2024},
    doi = {https://doi.org/10.14778/3681954.3681970},
    language = {Englisch},
    cr-category = {I.5.3 Pattern Recognition Clustering},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {Efficient clustering algorithms, such as k-Means, are often used in practice
      because they scale well for large datasets. However, they are only able to
      detect simple data characteristics. Ensemble clustering can overcome this
      limitation by combining multiple results of efficient algorithms. However,
      analysts face several challenges when applying ensemble clustering, i. e.,
      analysts struggle to (a) efficiently generate an ensemble and (b) combine the
      ensemble using a suitable consensus function with a corresponding
      hyperparameter setting. In this paper, we propose EffEns, an efficient ensemble
      clustering approach to address these challenges. Our approach relies on
      meta-learning to learn about dataset characteristics and the correlation
      between generated base clusterings and the performance of consensus functions.
      We apply the learned knowledge to generate appropriate ensembles and select a
      suitable consensus function to combine their results. Further, we use a
      state-of-the-art optimization technique to tune the hyperparameters of the
      selected consensus function. Our comprehensive evaluation on synthetic and
      real-world datasets demonstrates that EffEns significantly outperforms
      state-of-the-art approaches w.r.t. accuracy and runtime.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2024-10&engl=0}
 }

@inproceedings {INPROC-2024-09,
    author = {Dennis Treder-Tschechlov and Manuel Fritz and Holger Schwarz and Bernhard Mitschang},
    title = {{Ensemble Clustering based on Meta-Learning and Hyperparameter Optimization}},
    booktitle = {Proc. VLDB Endow. 17, 11 (July 2024), 2880–2892.},
    editor = {Proceedings of the VLDB Endowment},
    publisher = {ACM},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    pages = {2880--2892},
    type = {Konferenz-Beitrag},
    month = {August},
    year = {2024},
    doi = {https://doi.org/10.14778/3681954.3681970},
    language = {Englisch},
    cr-category = {I.5.3 Pattern Recognition Clustering},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {Efficient clustering algorithms, such as k-Means, are often used in practice
      because they scale well for large datasets. However, they are only able to
      detect simple data characteristics. Ensemble clustering can overcome this
      limitation by combining multiple results of efficient algorithms. However,
      analysts face several challenges when applying ensemble clustering, i. e.,
      analysts struggle to (a) efficiently generate an ensemble and (b) combine the
      ensemble using a suitable consensus function with a corresponding
      hyperparameter setting. In this paper, we propose EffEns, an efficient ensemble
      clustering approach to address these challenges. Our approach relies on
      meta-learning to learn about dataset characteristics and the correlation
      between generated base clusterings and the performance of consensus functions.
      We apply the learned knowledge to generate appropriate ensembles and select a
      suitable consensus function to combine their results. Further, we use a
      state-of-the-art optimization technique to tune the hyperparameters of the
      selected consensus function. Our comprehensive evaluation on synthetic and
      real-world datasets demonstrates that EffEns significantly outperforms
      state-of-the-art approaches w.r.t. accuracy and runtime.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2024-09&engl=0}
 }

@inproceedings {INPROC-2024-08,
    author = {Dennis Treder-Tschechlov and Manuel Fritz and Holger Schwarz and Bernhard Mitschang},
    title = {{Ensemble Clustering based on Meta-Learning and Hyperparameter Optimization}},
    booktitle = {Proc. VLDB Endow. 17, 11 (July 2024), 2880–2892.},
    editor = {Proceedings of the VLDB Endowment},
    publisher = {ACM},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    pages = {2880--2892},
    type = {Konferenz-Beitrag},
    month = {August},
    year = {2024},
    doi = {https://doi.org/10.14778/3681954.3681970},
    language = {Englisch},
    cr-category = {I.5.3 Pattern Recognition Clustering},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {Efficient clustering algorithms, such as k-Means, are often used in practice
      because they scale well for large datasets. However, they are only able to
      detect simple data characteristics. Ensemble clustering can overcome this
      limitation by combining multiple results of efficient algorithms. However,
      analysts face several challenges when applying ensemble clustering, i. e.,
      analysts struggle to (a) efficiently generate an ensemble and (b) combine the
      ensemble using a suitable consensus function with a corresponding
      hyperparameter setting. In this paper, we propose EffEns, an efficient ensemble
      clustering approach to address these challenges. Our approach relies on
      meta-learning to learn about dataset characteristics and the correlation
      between generated base clusterings and the performance of consensus functions.
      We apply the learned knowledge to generate appropriate ensembles and select a
      suitable consensus function to combine their results. Further, we use a
      state-of-the-art optimization technique to tune the hyperparameters of the
      selected consensus function. Our comprehensive evaluation on synthetic and
      real-world datasets demonstrates that EffEns significantly outperforms
      state-of-the-art approaches w.r.t. accuracy and runtime.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2024-08&engl=0}
 }

@inproceedings {INPROC-2024-07,
    author = {Dennis Treder-Tschechlov and Manuel Fritz and Holger Schwarz and Bernhard Mitschang},
    title = {{Ensemble Clustering based on Meta-Learning and Hyperparameter Optimization}},
    booktitle = {Proc. VLDB Endow. 17, 11 (July 2024), 2880–2892.},
    editor = {Proceedings of the VLDB Endowment},
    publisher = {VLDB Endowment},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    pages = {2880--2892},
    type = {Konferenz-Beitrag},
    month = {August},
    year = {2024},
    language = {Englisch},
    cr-category = {I.5.3 Pattern Recognition Clustering},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {Efficient clustering algorithms, such as k-Means, are often used in practice
      because they scale well for large datasets. However, they are only able to
      detect simple data characteristics. Ensemble clustering can overcome this
      limitation by combining multiple results of efficient algorithms. However,
      analysts face several challenges when applying ensemble clustering, i. e.,
      analysts struggle to (a) efficiently generate an ensemble and (b) combine the
      ensemble using a suitable consensus function with a corresponding
      hyperparameter setting. In this paper, we propose EffEns, an efficient ensemble
      clustering approach to address these challenges. Our approach relies on
      meta-learning to learn about dataset characteristics and the correlation
      between generated base clusterings and the performance of consensus functions.
      We apply the learned knowledge to generate appropriate ensembles and select a
      suitable consensus function to combine their results. Further, we use a
      state-of-the-art optimization technique to tune the hyperparameters of the
      selected consensus function. Our comprehensive evaluation on synthetic and
      real-world datasets demonstrates that EffEns significantly outperforms
      state-of-the-art approaches w.r.t. accuracy and runtime.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2024-07&engl=0}
 }

@inproceedings {INPROC-2024-04,
    author = {Jan Schneider and Arnold Lutsch and Christoph Gr{\"o}ger and Holger Schwarz and Bernhard Mitschang},
    title = {{First Experiences on the Application of Lakehouses in Industrial Practice}},
    booktitle = {Proceedings of the 35th GI-Workshop on Foundations of Databases (Grundlagen von Datenbanken), Herdecke, Germany},
    editor = {Uta St{\"o}rl},
    publisher = {CEUR Workshop Proceedings},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    series = {CEUR Workshop Proceedings},
    volume = {3710},
    pages = {3--8},
    type = {Workshop-Beitrag},
    month = {Juni},
    year = {2024},
    isbn = {1613-0073},
    keywords = {Data Lakehouse; Data Platform; Platform Architecture; Data Analytics; Case Study; Industry Experience},
    language = {Englisch},
    cr-category = {H.3.4 Information Storage and Retrieval Systems and Software,
                   H.4.2 Information Systems Applications Types of Systems},
    ee = {https://ceur-ws.org/Vol-3710/paper1.pdf},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {In recent years, so-called lakehouses have emerged as a new type of data
      platform that intends to combine characteristics of data warehouses and data
      lakes. Although companies started to employ the associated concepts and
      technologies as part of their analytics architectures, little is known about
      their practical medium- and long-term experiences as well as proven
      architectural decisions. Additionally, there is only limited knowledge about
      how lakehouses can be utilized effectively in an industrial context. Hence, it
      remains unclear under which circumstances lakehouses represent a viable
      alternative to conventional data platforms. To address this gap, we conducted a
      case study on a real-world industrial case, in which manufacturing data needs
      to be managed and analytically exploited. Within the scope of this case, a
      dedicated analytics department has been testing and leveraging a lakehouse
      approach for several months in a productive environment with high data volumes
      and various types of analytical workloads. The paper at hand presents the
      results of our within-case analyses and focuses on the industrial setting of
      the case as well as the architecture of the utilized lakehouse. This way, it
      provides preliminary insights on the application of lakehouses in industrial
      practice and refers to useful architectural decisions.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2024-04&engl=0}
 }

@inproceedings {INPROC-2023-06,
    author = {Jan Schneider and Christoph Gr{\"o}ger and Arnold Lutsch and Holger Schwarz and Bernhard Mitschang},
    title = {{Assessing the Lakehouse: Analysis, Requirements and Definition}},
    booktitle = {Proceedings of the 25th International Conference on Enterprise Information Systems, ICEIS 2023, Volume 1, Prague, Czech Republic, April 24-26, 2023},
    editor = {Joaquim Filipe and Michal Smialek and Alexander Brodsky and Slimane Hammoudi},
    address = {Prague},
    publisher = {SciTePress},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    pages = {44--56},
    type = {Konferenz-Beitrag},
    month = {Mai},
    year = {2023},
    isbn = {978-989-758-648-4},
    issn = {2184-4992},
    doi = {10.5220/0011840500003467},
    keywords = {Lakehouse; Data Warehouse; Data Lake; Data Management; Data Analytics},
    language = {Englisch},
    cr-category = {H.2.4 Database Management Systems,
                   H.2.7 Database Administration,
                   H.2.8 Database Applications},
    ee = {https://www.scitepress.org/PublicationsDetail.aspx?ID=9ydI3Lyl2Fk=,
      https://doi.org/10.5220/0011840500003467},
    contact = {jan.schneider@ipvs.uni-stuttgart.de},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {The digital transformation opens new opportunities for enterprises to optimize
      their business processes by applying data-driven analysis techniques. For
      storing and organizing the required huge amounts of data, different types of
      data platforms have been employed in the past, with data warehouses and data
      lakes being the most prominent ones. Since they possess rather contrary
      characteristics and address different types of analytics, companies typically
      utilize both of them, leading to complex architectures with replicated data and
      slow analytical processes. To counter these issues, vendors have recently been
      making efforts to break the boundaries and to combine features of both worlds
      into integrated data platforms. Such systems are commonly called lakehouses and
      promise to simplify enterprise analytics architectures by serving all kinds of
      analytical workloads from a single platform. However, it remains unclear how
      lakehouses can be characterized, since existing definitions focus al most
      arbitrarily on individual architectural or functional aspects and are often
      driven by marketing. In this paper, we assess prevalent definitions for
      lakehouses and finally propose a new definition, from which several technical
      requirements for lakehouses are derived. We apply these requirements to several
      popular data management tools, such as Delta Lake, Snowflake and Dremio in
      order to evaluate whether they enable the construction of lakehouses.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2023-06&engl=0}
 }

@inproceedings {INPROC-2023-02,
    author = {Dennis Treder-Tschechlov and Peter Reimann and Holger Schwarz and Bernhard Mitschang},
    title = {{Approach to Synthetic Data Generation for Imbalanced Multi-class Problems with Heterogeneous Groups}},
    booktitle = {Tagungsband der 20. Fachtagung Datenbanksysteme f{\"u}r Business, Technologie und Web (BTW 2019)},
    publisher = {GI Gesellschaft f{\"u}r Informatik e.V. (GI)},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    series = {Lecture Notes in Informatics (LNI)},
    pages = {329--351},
    type = {Konferenz-Beitrag},
    month = {M{\"a}rz},
    year = {2023},
    keywords = {Machine learning; classification; data generation; real-world data characteristics},
    language = {Englisch},
    cr-category = {H.2.8 Database Applications},
    ee = {https://dl.gi.de/bitstream/handle/20.500.12116/40320/B3-5.pdf?},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {To benchmark novel classification algorithms, these algorithms should be
      evaluated on data with characteristics that also appear in real-world use
      cases. Important data characteristics that often lead to challenges for
      classification approaches are multi-class imbalance and heterogeneous groups.
      Heterogeneous groups are sets of real-world entities, where the classification
      patterns may vary among different groups and where the groups are typically
      imbalanced in the data. Real-world data that comprise these characteristics are
      usually not publicly available, e.g., because they constitute sensitive patient
      information or due to privacy concerns. Further, the manifestations of the
      characteristics cannot be controlled specifically on real-world data. A more
      rigorous approach is to synthetically generate data such that different
      manifestations of the characteristics can be controlled as well. However,
      existing data generators are not able to generate data that feature both data
      characteristics, i.e., multi-class imbalance and heterogeneous groups. In this
      paper, we propose an approach that fills this gap as it allows to synthetically
      generate data that exhibit both characteristics. We make use of a taxonomy
      model that organizes real-world entities in domain-specific heterogeneous
      groups to generate data reflecting the characteristics of these groups.
      Further, we incorporate probability distributions to reflect the imbalances of
      multiple classes and groups from real-world use cases. The evaluation shows
      that our approach can generate data that feature the data characteristics
      multi-class imbalance and heterogeneous groups and that it allows to control
      different manifestations of these characteristics.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2023-02&engl=0}
 }

@inproceedings {INPROC-2022-08,
    author = {Rebecca Eichler and Christoph Gr{\"o}ger and Eva Hoos and Christoph Stach and Holger Schwarz and Bernhard Mitschang},
    title = {{Establishing the Enterprise Data Marketplace: Characteristics, Architecture, and Challenges}},
    booktitle = {Proceedings of the Workshop on Data Science for Data Marketplaces in Conjunction with the 48th International Conference on Very Large Data Bases},
    editor = {Xiaohui Yu and Jian Pei},
    publisher = {-},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    pages = {1--12},
    type = {Workshop-Beitrag},
    month = {September},
    year = {2022},
    language = {Englisch},
    cr-category = {E.m Data Miscellaneous,
                   H.3.7 Digital Libraries,
                   H.4.m Information Systems Applications Miscellaneous},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {Companies today have increasing amounts of data at their disposal, most of
      which is not used, leaving the data value unexploited. In order to leverage the
      data value, the data must be democratized, i.e., made available to the company
      employees. In this context, the use of enterprise data marketplaces, platforms
      for trading data within a company, are proposed. However, specifics of
      enterprise data marketplaces and how these can be implemented have not been
      investigated in literature so far. To shed light on these topics, we illustrate
      the characteristics of an enterprise data marketplace and highlight according
      marketplace requirements. We provide an enterprise data marketplace
      architecture, discuss how it integrates into a company's system landscape and
      present an enterprise data marketplace prototype. Finally, we examine
      organizational and technical challenges which arise when operating a
      marketplace in the enterprise context. In this paper, we thereby present the
      enterprise data marketplace as a distinct marketplace type and provide the
      basis for establishing it within a company.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2022-08&engl=0}
 }

@inproceedings {INPROC-2022-05,
    author = {Rebecca Eichler and Christoph Gr{\"o}ger and Eva Hoos and Holger Schwarz and Bernhard Mitschang},
    title = {{Data Shopping — How an Enterprise Data Marketplace Supports Data Democratization in Companies}},
    booktitle = {Proceedings of the 34th International Conference on Intelligent Information Systems},
    editor = {Jochen De Weerdt and Artem Polyvyanyy},
    address = {Stuttgart},
    publisher = {Springer International Publishing},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    series = {Rebecca Eichler},
    pages = {19--26},
    type = {Konferenz-Beitrag},
    month = {Mai},
    year = {2022},
    isbn = {https://doi.org/10.1007/978-3-031-07481-3_3},
    keywords = {Data Marketplace; Data Sharing; Data Democratization},
    language = {Englisch},
    cr-category = {H.0 Information Systems General},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {To exploit the company's data value, employees must be able to find, understand
      and access it. The process of making corporate data available to the majority
      of the company's employees is referred to as data democratization. In this
      work, we present the current state and challenges of data democratization in
      companies, derived from a comprehensive literature study and expert interviews
      we conducted with a manufacturer. In this context a data consumer's journey is
      presented that reflects the required steps, tool types and roles for finding,
      understanding and accessing data in addition to revealing three data
      democratization challenges. To address these challenges we propose the use of
      an enterprise data marketplace, a novel type of information system for sharing
      data within the company. We developed a prototype based on which a suitability
      assessment of a data marketplace yields an improved consumer journey and
      demonstrates that the marketplace addresses the data democratization challenges
      and consequently, shows that the marketplace is suited for realizing data
      democratization.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2022-05&engl=0}
 }

@inproceedings {INPROC-2021-06,
    author = {Rebecca Eichler and Corinna Giebler and Christoph Gr{\"o}ger and Eva Hoos and Holger Schwarz and Bernhard Mitschang},
    title = {{Enterprise-Wide Metadata Management - An Industry Case on the Current State and Challenges}},
    booktitle = {24thInternational Conference on Business Information Systems},
    editor = {Witold Abramowicz and S{\"o}ren Auer and Lewa\&\#324 and El\&\#380 Ska and Bieta},
    publisher = {TIB Open Publishing},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    pages = {269--279},
    type = {Konferenz-Beitrag},
    month = {Juli},
    year = {2021},
    doi = {https://doi.org/10.52825/bis.v1i.47},
    language = {Englisch},
    cr-category = {A.0 General Literature, General},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {Abstract. Metadata management is a crucial success factor for companies today,
      as for example, it enables exploiting data value fully or enables legal
      compliance. With the emergence of new concepts, such as the data lake, and new
      objectives, such as the enterprise-wide sharing of data, metadata management
      has evolved and now poses a renewed challenge for companies. In this context,
      we interviewed a globally active manufacturer to reveal how metadata management
      is implemented in practice today and what challenges companies are faced with
      and whether these constitute research gaps. As an outcome, we present the
      company{\^a}€™s metadata management goals and their corresponding solution
      approaches and challenges. An evaluation of the challenges through a literature
      and tool review yields three research gaps, which are concerned with the
      topics: (1) metadata management for data lakes, (2) categorizations and
      compositions of metadata management tools for comprehensive metadata
      management, and (3) the use of data marketplaces as metadata-driven exchange
      platforms within an enterprise. The gaps lay the groundwork for further
      research activities in the field of metadata management and the industry case
      represents a starting point for research to realign with real-world industry
      needs.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2021-06&engl=0}
 }

@inproceedings {INPROC-2021-05,
    author = {Corinna Giebler and Christoph Gr{\"o}ger and Eva Hoos and Rebecca Eichler and Holger Schwarz and Bernhard Mitschang},
    title = {{The Data Lake Architecture Framework}},
    booktitle = {Datenbanksysteme f{\"u}r Business, Technologie und Web (BTW 2021), 19. Fachtagung des GI-Fachbereichs Datenbanken und Informationssysteme (DBIS), 13.-17. September 2021, Dresden, Germany},
    publisher = {Gesellschaft f{\"u}r Informatik},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    pages = {351--370},
    type = {Konferenz-Beitrag},
    month = {September},
    year = {2021},
    doi = {10.18420/btw2021-19},
    language = {Englisch},
    cr-category = {H.4 Information Systems Applications},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {During recent years, data lakes emerged as a way to manage large amounts of
      heterogeneous data for modern data analytics. Although various work on
      individual aspects of data lakes exists, there is no comprehensive data lake
      architecture yet. Concepts that describe themselves as a {\^a}€œdata lake
      architecture{\^a}€ are only partial. In this work, we introduce the data lake
      architecture framework. It supports the definition of data lake architectures
      by defining nine architectural aspects, i.e., perspectives on a data lake, such
      as data storage or data modeling, and by exploring the interdependencies
      between these aspects. The included methodology helps to choose appropriate
      concepts to instantiate each aspect. To evaluate the framework, we use it to
      configure an exemplary data lake architecture for a real-world data lake
      implementation. This final assessment shows that our framework provides
      comprehensive guidance in the configuration of a data lake architecture.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2021-05&engl=0}
 }

@inproceedings {INPROC-2021-04,
    author = {Manuel Fritz and Gang Shao and Holger Schwarz},
    title = {{Automatic Selection of Analytic Platforms with ASAP-DM}},
    booktitle = {Proceedings of the 33rd International Conference on Scientific and Statistical Database Management},
    publisher = {ACM},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    pages = {220--225},
    type = {Konferenz-Beitrag},
    month = {Juli},
    year = {2021},
    isbn = {9781450384131},
    doi = {10.1145/3468791.3468802},
    language = {Englisch},
    cr-category = {H.2.8 Database Applications},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {The plethora of available analytic platforms escalates the difficulty of
      selecting the most appropriate platform for a certain data mining task and
      datasets with varying characteristics. Especially novice analysts experience
      difficulties to keep up with the latest technical developments. In this demo,
      we present the ASAP-DM framework. ASAP-DM is able to automatically select a
      well-performing analytic platform for a given data mining task via an intuitive
      web interface, thus especially supporting novice analysts. The take-aways for
      demo attendees are: (1) a good understanding of the challenges of various data
      mining workloads, dataset characteristics, and the effects on the selection of
      analytic platforms, (2) useful insights on how ASAP-DM internally works, and
      (3) how to benefit from ASAP-DM for exploratory data analysis.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2021-04&engl=0}
 }

@inproceedings {INPROC-2021-03,
    author = {Dennis Tschechlov and Manuel Fritz and Holger Schwarz},
    title = {{AutoML4Clust: Efficient AutoML for Clustering Analyses}},
    booktitle = {Proceedings of the 24th International Conference on Extending Database Technology (EDBT)},
    publisher = {Online},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    pages = {1--6},
    type = {Konferenz-Beitrag},
    month = {M{\"a}rz},
    year = {2021},
    doi = {10.5441/002/EDBT.2021.32},
    language = {Englisch},
    cr-category = {H.2.8 Database Applications},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {Data analysis is a highly iterative process. In order to achieve valuable
      analysis results, analysts typically execute many configurations, i.e.,
      algorithms and their hyperparameter settings, based on their domain knowledge.
      While experienced analysts may be able to define small search spaces for
      promising configurations, especially novice analysts define large search spaces
      due to their lack of domain knowledge. In the worst case, they perform an
      exhaustive search throughout the whole search space, resulting in infeasible
      runtimes. Recent advances in the research area of AutoML address this challenge
      by supporting novice analysts in the combined algorithm selection and
      hyperparameter optimization (CASH) problem for supervised learning tasks.
      However, no such systems exist for unsupervised learning tasks, such as the
      prevalent task of clustering analysis. In this work, we present our novel
      AutoML4Clust approach, which efficiently supports novice analysts regarding
      CASH for clustering analyses. To the best of our knowledge, this is the first
      thoroughly elaborated approach in this area. Our comprehensive evaluation
      unveils that AutoML4Clust significantly outperforms several existing
      approaches, as it achieves considerable speedups for the CASH problem, while
      still achieving very valuable clustering results.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2021-03&engl=0}
 }

@inproceedings {INPROC-2021-02,
    author = {Manuel Fritz and Dennis Tschechlov and Holger Schwarz},
    title = {{Efficient Exploratory Clustering Analyses with Qualitative Approximations}},
    booktitle = {Proceedings of the 24th International Conference on Extending Database Technology (EDBT)},
    publisher = {Online},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    pages = {1--6},
    type = {Konferenz-Beitrag},
    month = {M{\"a}rz},
    year = {2021},
    doi = {10.5441/002/EDBT.2021.31},
    language = {Englisch},
    cr-category = {H.2.8 Database Applications},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {Clustering is a fundamental primitive for exploratory data analyses. Yet,
      finding valuable clustering results for previously unseen datasets is a pivotal
      challenge. Analysts as well as automated exploration methods often perform an
      exploratory clustering analysis, i.e., they repeatedly execute a clustering
      algorithm with varying parameters until valuable results can be found. k-center
      clustering algorithms, such as k-Means, are commonly used in such exploratory
      processes. However, in the worst case, each single execution of k-Means
      requires a super-polynomial runtime, making the overall exploratory process on
      voluminous datasets infeasible in a reasonable time frame. We propose a novel
      and efficient approach for approximating results of k-center clustering
      algorithms, thus supporting analysts in an ad-hoc exploratory process for
      valuable clustering results. Our evaluation on an Apache Spark cluster unveils
      that our approach significantly outperforms the regular execution of a k-center
      clustering algorithm by several orders of magnitude in runtime with a
      predefinable qualitative demand. Hence, our approach is a strong fit for
      clustering voluminous datasets in exploratory settings.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2021-02&engl=0}
 }

@inproceedings {INPROC-2020-55,
    author = {Corinna Giebler and Christoph Gr{\"o}ger and Eva Hoos and Holger Schwarz and Bernhard Mitschang},
    title = {{A Zone Reference Model for Enterprise-Grade Data Lake Management}},
    booktitle = {Proceedings of the 24th IEEE Enterprise Computing Conference},
    publisher = {IEEE},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    pages = {57--66},
    type = {Konferenz-Beitrag},
    month = {Oktober},
    year = {2020},
    keywords = {Data Lake; Zones; Reference Model; Industry Case; Industry Experience},
    language = {Englisch},
    cr-category = {H.4 Information Systems Applications},
    contact = {Senden Sie eine E-Mail an corinna.giebler@ipvs.uni-stuttgart.de},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {Data lakes are on the rise as data platforms for any kind of analytics, from
      data exploration to machine learning. They achieve the required flexibility by
      storing heterogeneous data in their raw format, and by avoiding the need for
      pre-defined use cases. However, storing only raw data is inefficient, as for
      many applications, the same data processing has to be applied repeatedly. To
      foster the reuse of processing steps, literature proposes to store data in
      different degrees of processing in addition to their raw format. To this end,
      data lakes are typically structured in zones. There exists various zone models,
      but they are varied, vague, and no assessments are given. It is unclear which
      of these zone models is applicable in a practical data lake implementation in
      enterprises. In this work, we assess existing zone models using requirements
      derived from multiple representative data analytics use cases of a real-world
      industry case. We identify the shortcomings of existing work and develop a zone
      reference model for enterprise-grade data lake management in a detailed manner.
      We assess the reference model's applicability through a prototypical
      implementation for a real-world enterprise data lake use case. This assessment
      shows that the zone reference model meets the requirements relevant in practice
      and is ready for industry use.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-55&engl=0}
 }

@inproceedings {INPROC-2020-54,
    author = {Manuel Fritz and Dennis Tschechlov and Holger Schwarz},
    title = {{Learning from Past Observations: Meta-Learning for Efficient Clustering Analyses}},
    booktitle = {Proceedings of 22nd Big Data Analytics and Knowledge Discovery (DaWaK), 2020},
    editor = {Min Song and Il-Yeol Song and Gabriele Kotsis and A Min Tjoa and Ismail Khalil},
    publisher = {Springer, Cham},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    series = {Lecture Notes in Computer Science},
    volume = {12393},
    pages = {364--379},
    type = {Konferenz-Beitrag},
    month = {September},
    year = {2020},
    isbn = {978-3-030-59065-9},
    doi = {https://doi.org/10.1007/978-3-030-59065-9_28},
    language = {Englisch},
    cr-category = {H.3.3 Information Search and Retrieval},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {Many clustering algorithms require the number of clusters as input parameter
      prior to execution. Since the {\^a}€œbest{\^a}€ number of clusters is most often
      unknown in advance, analysts typically execute clustering algorithms multiple
      times with varying parameters and subsequently choose the most promising
      result. Several methods for an automated estimation of suitable parameters have
      been proposed. Similar to the procedure of an analyst, these estimation methods
      draw on repetitive executions of a clustering algorithm with varying
      parameters. However, when working with voluminous datasets, each single
      execution tends to be very time-consuming. Especially in today{\^a}€™s Big Data
      era, such a repetitive execution of a clustering algorithm is not feasible for
      an efficient exploration. We propose a novel and efficient approach to
      accelerate estimations for the number of clusters in datasets. Our approach
      relies on the idea of meta-learning and terminates each execution of the
      clustering algorithm as soon as an expected qualitative demand is met. We show
      that this new approach is generally applicable, i.e., it can be used with
      existing estimation methods. Our comprehensive evaluation reveals that our
      approach is able to speed up the estimation of the number of clusters by an
      order of magnitude, while still achieving accurate estimates.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-54&engl=0}
 }

@inproceedings {INPROC-2020-53,
    author = {Manuel Fritz and Michael Behringer and Holger Schwarz},
    title = {{LOG-Means: Efficiently Estimating the Number of Clusters in Large Datasets}},
    booktitle = {Proceedings of the 46th International Conference on Very Large Databases (VLDB)},
    editor = {Magdalena Balazinska and Xiaofang Zhou},
    publisher = {ACM Digital Library},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    series = {Proceedings of the VLDB Endowment},
    volume = {13 (12)},
    pages = {2118--2131},
    type = {Konferenz-Beitrag},
    month = {August},
    year = {2020},
    isbn = {ISSN 2150-8097},
    doi = {https://doi.org/10.14778/3407790.3407813},
    language = {Englisch},
    cr-category = {H.2.8 Database Applications},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {Clustering is a fundamental primitive in manifold applications. In order to
      achieve valuable results, parameters of the clustering algorithm, e.g., the
      number of clusters, have to be set appropriately, which is a tremendous
      pitfall. To this end, analysts rely on their domain knowledge in order to
      define parameter search spaces. While experienced analysts may be able to
      define a small search space, especially novice analysts often define rather
      large search spaces due to the lack of in-depth domain knowledge. These search
      spaces can be explored in different ways by estimation methods for the number
      of clusters. In the worst case, estimation methods perform an exhaustive search
      in the given search space, which leads to infeasible runtimes for large
      datasets and large search spaces. We propose LOG-Means, which is able to
      overcome these issues of existing methods. We show that LOG-Means provides
      estimates in sublinear time regarding the defined search space, thus being a
      strong fit for large datasets and large search spaces. In our comprehensive
      evaluation on an Apache Spark cluster, we compare LOG-Means to 13 existing
      estimation methods. The evaluation shows that LOG-Means significantly
      outperforms these methods in terms of runtime and accuracy. To the best of our
      knowledge, this is the most systematic comparison on large datasets and search
      spaces as of today.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-53&engl=0}
 }

@inproceedings {INPROC-2020-50,
    author = {Rebecca Eichler and Corinna Giebler and Christoph Gr{\"o}ger and Holger Schwarz and Bernhard Mitschang},
    title = {{HANDLE - A Generic Metadata Model for Data Lakes}},
    booktitle = {Big Data Analytics and Knowledge Discovery: 22nd International Conference, DaWaK 2020, Bratislava, Slovakia, September 14–17, 2020, Proceedings},
    publisher = {Springer, Cham},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    pages = {73--88},
    type = {Konferenz-Beitrag},
    month = {September},
    year = {2020},
    language = {Englisch},
    cr-category = {H.2 Database Management},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {The substantial increase in generated data induced the development of new
      concepts such as the data lake. A data lake is a large storage repository
      designed to enable flexible extraction of the data{\^a}€™s value. A key aspect of
      exploiting data value in data lakes is the collection and management of
      metadata. To store and handle the metadata, a generic metadata model is
      required that can reflect metadata of any potential metadata management use
      case, e.g., data versioning or data lineage. However, an evaluation of existent
      metadata models yields that none so far are sufficiently generic. In this work,
      we present HANDLE, a generic metadata model for data lakes, which supports the
      flexible integration of metadata, data lake zones, metadata on various granular
      levels, and any metadata categorization. With these capabilities HANDLE enables
      comprehensive metadata management in data lakes. We show HANDLE{\^a}€™s feasibility
      through the application to an exemplary access-use-case and a prototypical
      implementation. A comparison with existent models yields that HANDLE can
      reflect the same information and provides additional capabilities needed for
      metadata management in data lakes.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-50&engl=0}
 }

@inproceedings {INPROC-2020-45,
    author = {Rebecca Eichler and Corinna Giebler and Christoph Gr{\"o}ger and Holger Schwarz and Bernhard Mitschang},
    title = {{HANDLE - A Generic Metadata Model for Data Lakes}},
    booktitle = {Big Data Analytics and Knowledge Discovery},
    editor = {Min Song and Il-Yeol Song and Gabriele Kotsis and A Min Tjoa and Ismail Khalil},
    publisher = {Springer Nature Switzerland AG},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    series = {Lecture Notes in Computer Science},
    volume = {12393},
    pages = {73--88},
    type = {Konferenz-Beitrag},
    month = {September},
    year = {2020},
    doi = {https://doi.org/10.1007/978-3-030-59065-9_7},
    keywords = {Metadata management; Metadata model; Data lake},
    language = {Englisch},
    cr-category = {H.2 Database Management},
    ee = {ftp://ftp.informatik.uni-stuttgart.de/pub/library/ncstrl.ustuttgart_fi/INPROC-2020-45/INPROC-2020-45.pdf},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {The substantial increase in generated data induced the development of new
      concepts such as the data lake. A data lake is a large storage repository
      designed to enable flexible extraction of the data's value. A key aspect of
      exploiting data value in data lakes is the collection and management of
      metadata. To store and handle the metadata, a generic metadata model is
      required that can reflect metadata of any potential metadata management use
      case, e.g., data versioning or data lineage. However, an evaluation of existent
      metadata models yields that none so far are sufficiently generic. In this work,
      we present HANDLE, a generic metadata model for data lakes, which supports the
      flexible integration of metadata, data lake zones, metadata on various granular
      levels, and any metadata categorization. With these capabilities HANDLE enables
      comprehensive metadata management in data lakes. We show HANDLE's feasibility
      through the application to an exemplary access-use-case and a prototypical
      implementation. A comparison with existent models yields that HANDLE can
      reflect the same information and provides additional capabilities needed for
      metadata management in data lakes.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-45&engl=0}
 }

@inproceedings {INPROC-2019-20,
    author = {Manuel Fritz and Holger Schwarz},
    title = {{Initializing k-Means Efficiently: Benefits for Exploratory Cluster Analysis}},
    booktitle = {On the Move to Meaningful Internet Systems: OTM 2019 Conferences},
    editor = {Herv{\'e} Panetto and Christophe Debruyne and Martin Hepp and Dave Lewis and Claudio Agostino Ardagna and Robert Meersman},
    publisher = {Springer Nature Switzerland AG},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    series = {Lecture Notes in Computer Science (LNCS)},
    volume = {11877},
    pages = {146--163},
    type = {Konferenz-Beitrag},
    month = {Januar},
    year = {2019},
    issn = {978-3-030-33245-7},
    doi = {10.1007/978-3-030-33246-4},
    keywords = {Exploratory cluster analysis; k-Means; Initialization},
    language = {Englisch},
    cr-category = {E.0 Data General,
                   H.2.8 Database Applications,
                   H.3.3 Information Search and Retrieval},
    ee = {https://link.springer.com/chapter/10.1007/978-3-030-33246-4_9},
    contact = {manuel.fritz@ipvs.uni-stuttgart.de},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {Data analysis is a highly exploratory task, where various algorithms with
      different parameters are executed until a solid result is achieved. This is
      especially evident for cluster analyses, where the number of clusters must be
      provided prior to the execution of the clustering algorithm. Since this number
      is rarely known in advance, the algorithm is typically executed several times
      with varying parameters. Hence, the duration of the exploratory analysis
      heavily dependends on the runtime of each execution of the clustering
      algorithm. While previous work shows that the initialization of clustering
      algorithms is crucial for fast and solid results, it solely focuses on a single
      execution of the clustering algorithm and thereby neglects previous executions.
      We propose Delta Initialization as an initialization strategy for k-Means in
      such an exploratory setting. The core idea of this new algorithm is to exploit
      the clustering results of previous executions in order to enhance the
      initialization of subsequent executions. We show that this algorithm is well
      suited for exploratory cluster analysis as considerable speedups can be
      achieved while additionally achieving superior clustering results compared to
      state-of-the-art initialization strategies.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2019-20&engl=0}
 }

@inproceedings {INPROC-2019-15,
    author = {Corinna Giebler and Christoph Gr{\"o}ger and Eva Hoos and Holger Schwarz},
    title = {{Modeling Data Lakes with Data Vault: Practical Experiences, Assessment, and Lessons Learned}},
    booktitle = {Proceedings of the 38th Conference on Conceptual Modeling (ER 2019)},
    publisher = {Springer},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    pages = {1--14},
    type = {Konferenz-Beitrag},
    month = {November},
    year = {2019},
    keywords = {Data Lakes; Data Vault; Data Modeling; Industry Experience; Assessment; Lessons Learned},
    language = {Deutsch},
    cr-category = {H.2.1 Database Management Logical Design},
    contact = {Senden Sie eine E-Mail an Corinna.Giebler@ipvs.uni-stuttgart.de},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {Data lakes have become popular to enable organization-wide analytics on
      heterogeneous data from multiple sources. Data lakes store data in their raw
      format and are often characterized as schema-free. Nevertheless, it turned out
      that data still need to be modeled, as neglecting data modeling may lead to
      issues concerning e.g., quality and integration. In current research literature
      and industry practice, Data Vault is a popular modeling technique for
      structured data in data lakes. It promises a flexible, extensible data model
      that preserves data in their raw format. However, hardly any research or
      assessment exist on the practical usage of Data Vault for modeling data lakes.
      In this paper, we assess the Data Vault model{\^a}€™s suitability for the data lake
      context, present lessons learned, and investigate success factors for the use
      of Data Vault. Our discussion is based on the practical usage of Data Vault in
      a large, global manufacturer{\^a}€™s data lake and the insights gained in
      real-world analytics projects.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2019-15&engl=0}
 }

@inproceedings {INPROC-2019-14,
    author = {Corinna Giebler and Christoph Gr{\"o}ger and Eva Hoos and Holger Schwarz},
    title = {{Leveraging the Data Lake - Current State and Challenges}},
    booktitle = {Proceedings of the 21st International Conference on Big Data Analytics and Knowledge Discovery (DaWaK'19)},
    publisher = {Springer Nature},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    pages = {1--10},
    type = {Konferenz-Beitrag},
    month = {August},
    year = {2019},
    keywords = {Data Lakes, State of the Art, Challenges},
    language = {Deutsch},
    cr-category = {H.2.4 Database Management Systems,
                   H.2.8 Database Applications},
    contact = {Senden Sie eine E-Mail an Corinna.Giebler@ipvs.uni-stuttgart.de},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {The digital transformation leads to massive amounts of heterogeneous data
      challenging traditional data warehouse solutions in enterprises. In order to
      exploit these complex data for competitive advantages, the data lake recently
      emerged as a concept for more flexible and powerful data analytics. However,
      existing literature on data lakes is rather vague and incomplete, and the
      various realization approaches that have been proposed neither cover all
      aspects of data lakes nor do they provide a comprehensive design and
      realization strategy. Hence, enterprises face multiple challenges when building
      data lakes. To address these shortcomings, we investigate existing data lake
      literature and discuss various design and realization aspects for data lakes,
      such as governance or data models. Based on these insights, we identify
      challenges and research gaps concerning (1) data lake architecture, (2) data
      lake governance, and (3) a comprehensive strategy to realize data lakes. These
      challenges still need to be addressed to successfully leverage the data lake in
      practice.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2019-14&engl=0}
 }

@inproceedings {INPROC-2019-10,
    author = {Christian Weber and Pascal Hirmer and Peter Reimann and Holger Schwarz},
    title = {{A New Process Model for the Comprehensive Management of Machine Learning Models}},
    booktitle = {Proceedings of the 21st International Conference on Enterprise Information Systems (ICEIS); Heraklion, Crete, Greece, May 3-5, 2019},
    editor = {Joaquim Filipe and Michal Smialek and Alexander Brodsky and Slimane Hammoudi},
    publisher = {SciTePress},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    pages = {415--422},
    type = {Konferenz-Beitrag},
    month = {Mai},
    year = {2019},
    isbn = {978-989-758-372-8},
    doi = {10.5220/0007725304150422},
    keywords = {Model Management; Machine Learning; Analytics Process},
    language = {Englisch},
    cr-category = {I.2 Artificial Intelligence},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {The management of machine learning models is an extremely challenging task.
      Hundreds of prototypical models are being built and just a few are mature
      enough to be deployed into operational enterprise information systems. The
      lifecycle of a model includes an experimental phase in which a model is
      planned, built and tested. After that, the model enters the operational phase
      that includes deploying, using, and retiring it. The experimental phase is well
      known through established process models like CRISP-DM or KDD. However, these
      models do not detail on the interaction between the experimental and the
      operational phase of machine learning models. In this paper, we provide a new
      process model to show the interaction points of the experimental and
      operational phase of a machine learning model. For each step of our process, we
      discuss according functions which are relevant to managing machine learning
      models.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2019-10&engl=0}
 }

@inproceedings {INPROC-2018-14,
    author = {Corinna Giebler and Christoph Stach and Holger Schwarz and Bernhard Mitschang},
    title = {{BRAID - A Hybrid Processing Architecture for Big Data}},
    booktitle = {Proceedings of the 7th International Conference on Data Science, Technology and Applications (DATA 2018)},
    publisher = {INSTICC Press},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    pages = {1--8},
    type = {Konferenz-Beitrag},
    month = {Juli},
    year = {2018},
    keywords = {Big Data; IoT; Batch Processing; Stream Processing; Lambda Architecture; Kappa Architecture},
    language = {Englisch},
    cr-category = {D.2.11 Software Engineering Software Architectures,
                   H.2.4 Database Management Systems,
                   H.2.8 Database Applications},
    contact = {Senden Sie eine e-Mail an Corinna.Giebler@ipvs.uni-stuttgart.de},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {The Internet of Things is applied in many domains and collects vast amounts of
      data. This data provides access to a lot of knowledge when analyzed
      comprehensively. However, advanced analysis techniques such as predictive or
      prescriptive analytics require access to both, history data, i.e., long-term
      persisted data, and real-time data as well as a joint view on both types of
      data. State-of-the-art hybrid processing architectures for big data - namely,
      the Lambda and the Kappa Architecture - support the processing of history data
      and real-time data. However, they lack of a tight coupling of the two
      processing modes. That is, the user has to do a lot of work manually in order
      to enable a comprehensive analysis of the data. For instance, the user has to
      combine the results of both processing modes or apply knowledge from one
      processing mode to the other. Therefore, we introduce a novel hybrid processing
      architecture for big data, called BRAID. BRAID intertwines the processing of
      history data and real-time data by adding communication channels between the
      batch engine and the stream engine. This enables to carry out comprehensive
      analyses automatically at a reasonable overhead.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2018-14&engl=0}
 }

@inproceedings {INPROC-2015-47,
    author = {Oliver Kopp and Michael Falkenthal and Niklas Hartmann and Frank Leymann and Holger Schwarz and Jessica Thomsen},
    title = {{Towards a Cloud-based Platform Architecture for a Decentralized Market Agent}},
    booktitle = {INFORMATIK 2015},
    editor = {Douglas Cunningham and Petra Hofstedt and Klaus Meer and Ingo Schmitt},
    publisher = {Gesellschaft f{\"u}r Informatik e.V. (GI)},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    series = {Lecture Notes in Informatics (LNI)},
    volume = {P-246},
    pages = {69--80},
    type = {Workshop-Beitrag},
    month = {September},
    year = {2015},
    isbn = {978-3-88579-640-4},
    issn = {1617-5468},
    language = {Englisch},
    cr-category = {J.m Computer Applications Miscellaneous},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware;
                  Universit{\"a}t Stuttgart, Institut f{\"u}r Architektur von Anwendungssystemen},
    abstract = {Reorganization of power generation, thereby replacing conventional energy
      sources by innovative renewable energy sources, demands a change in
      distribution grid structure and operation. The foreseen Decentralized Marked
      Agent is a new role in the energy market sector accomplishing not only trading
      on energy and operating reserve markets but also regulating flexibilities at
      the distribution grid level, such as energy storage and decentralized energy
      generators, and thereby considering system services and securing system
      stability. This paper presents requirements on an IT system to support this new
      role.We design an architecture matching these requirements and show how Cloud
      computing technology can be used to implement the architecture. This enables
      data concerning the distribution grid being automatically gathered and
      processed by dedicated algorithms, aiming to optimize cost efficient operation
      and the development of the distribution grid.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2015-47&engl=0}
 }

@inproceedings {INPROC-2015-46,
    author = {Jessica Thomsen and Niklas Hartmann and Florian Klumpp and Thomas Erge and Michael Falkenthal and Oliver Kopp and Frank Leymann and Sven Stando and Nino Turek and Christoph Schlenzig and Holger Schwarz},
    title = {{Darstellung des Konzeptes -- DMA Decentralised Market Agent -- zur Bew{\"a}ltigung zuk{\"u}nftiger Herausforderungen in Verteilnetzen}},
    booktitle = {INFORMATIK 2015},
    editor = {Douglas Cunningham and Petra Hofstedt and Klaus Meer and Ingo Schmitt},
    publisher = {Gesellschaft f{\"u}r Informatik e.V. (GI)},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    series = {Lecture Notes in Informatics (LNI)},
    volume = {P-246},
    pages = {53--67},
    type = {Workshop-Beitrag},
    month = {September},
    year = {2015},
    isbn = {978-3-88579-640-4},
    issn = {1617-5468},
    language = {Deutsch},
    cr-category = {J.m Computer Applications Miscellaneous},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware;
                  Universit{\"a}t Stuttgart, Institut f{\"u}r Architektur von Anwendungssystemen},
    abstract = {In der vorliegenden Ver{\"o}ffentlichung wird ein Konzept f{\"u}r einen neuen
      Marktakteur im Strommarkt vorgestellt, der im zuk{\"u}nftigen Smart Grid als
      Aggregator und Planer fungieren kann. Dieser Decentralised Market Agent – DMA –
      soll die Informationen aller vorhandenen Erzeugungs- und Speicheranlagen,
      Lasten und Netzinformationen auf Verteilnetzebene aggregieren sowie mit lokalen
      Akteuren und an den zentralen M{\"a}rkten agieren um einen kostenoptimalen Betrieb
      und Ausbau des Systems Verteilnetzes zu realisieren. Zur Handlungsf{\"a}higkeit
      dieser neuen Marktrolle bedarf es hochaufl{\"o}sender Messungen im Verteilnetz und
      einer „real-time“ Aufbereitung der Messdaten. Im vorliegenden Paper sollen das
      Konzept sowie die notwendigen Bausteine zur Erreichung der Handlungsf{\"a}higkeit
      des DMA vorgestellt sowie die zuk{\"u}nftig geplanten Untersuchungen erl{\"a}utert
      werden. Die detaillierte Entwicklung des Konzepts sowie weiterf{\"u}hrende Analysen
      sind Teil des Projektes NEMAR – Netzbewirtschaftung als neue Marktrolle,
      gef{\"o}rdert durch BMWi im Rahmen der Forschungsinitiative Zukunftsf{\"a}hige
      Stromnetze.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2015-46&engl=0}
 }

@inproceedings {INPROC-2015-34,
    author = {Pascal Hirmer and Matthias Wieland and Holger Schwarz and Bernhard Mitschang and Uwe Breitenb{\"u}cher and Frank Leymann},
    title = {{SitRS - A Situation Recognition Service based on Modeling and Executing Situation Templates}},
    booktitle = {Proceedings of the 9th Symposium and Summer School On Service-Oriented Computing},
    editor = {Johanna Barzen and Rania Khalaf and Frank Leymann and Bernhard Mitschang},
    publisher = {IBM Research Report},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    series = {Technical Paper},
    volume = {RC25564},
    pages = {113--127},
    type = {Konferenz-Beitrag},
    month = {Dezember},
    year = {2015},
    keywords = {Situation Recognition, IoT, Context, Integration, Cloud Computing, OSLC},
    language = {Englisch},
    cr-category = {J.6 Computer-Aided Engineering,
                   H.3.1 Content Analysis and Indexing},
    ee = {http://domino.research.ibm.com/library/cyberdig.nsf/papers/656B934403848E8A85257F1D00695A63},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {Today, the Internet of Things has evolved due to an advanced connectivity of
      physical objects. Furthermore, Cloud Computing gains more and more interest for
      the provisioning of services. In this paper, we want to further improve the
      integration of these two areas by providing a cloud-based situation recognition
      service – SitRS. This service can be used to integrate real world objects – the
      things – into the internet by deriving their situational state based on
      sensors. This enables context-aware applications to detect events in a smart
      environment. SitRS is a basic service enabling a generic and easy
      implementation of Smart* applications such as SmartFactorys, SmartCities,
      SmartHomes. This paper introduces an approach containing a method and a system
      architecture for the realization of such a service. The core steps of the
      method are: (i) registration of the sensors, (ii) modeling of the situation,
      and (iii) execution of the situation recognition. Furthermore, a prototypical
      implementation of SitRS is presented and evaluated via runtime measurements.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2015-34&engl=0}
 }

@inproceedings {INPROC-2015-24,
    author = {Matthias Wieland and Holger Schwarz and Uwe Breitenb{\"u}cher and Frank Leymann},
    title = {{Towards Situation-Aware Adaptive Workflows}},
    booktitle = {Proceedings of the 13th Annual IEEE Intl. Conference on Pervasive Computing and Communications Workshops: 11th Workshop on Context and Activity Modeling and Recognition},
    address = {St. Louis, Missouri, USA},
    publisher = {IEEE},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    pages = {32--37},
    type = {Workshop-Beitrag},
    month = {M{\"a}rz},
    year = {2015},
    keywords = {situation-awareness; adaptive-workflows; situation recognition; situation-aware workflow system},
    language = {Englisch},
    cr-category = {H.4.1 Office Automation},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware;
                  Universit{\"a}t Stuttgart, Institut f{\"u}r Architektur von Anwendungssystemen},
    abstract = {Workflows are an established IT concept to achieve business goals in a reliable
      and robust manner. However, the dynamic nature of modern information systems,
      the upcoming Industry 4.0, and the Internet of Things increase the complexity
      of modeling robust workflows significantly as various kinds of situations, such
      as the failure of a production system, have to be considered explicitly.
      Consequently, modeling workflows in a situation-aware manner is a complex
      challenge that quickly results in big unmanageable workflow models. To overcome
      these issues, we present an approach that allows workflows to become
      situation-aware to automatically adapt their behavior according to the
      situation they are in. The approach is based on aggregated context information,
      which has been an important research topic in the last decade to capture
      information about an environment. We introduce a system that derives high-level
      situations from lower-level context and sensor information. A situation can be
      used by different situation-aware workflows to adapt to the current situation
      in their execution environment. SitOPT enables the detection of situations
      using different situation-recognition systems, exchange of information about
      detected situations, optimization of the situation recognition, and runtime
      adaption and optimization of situationaware workflows based on the recognized
      situations.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2015-24&engl=0}
 }

@inproceedings {INPROC-2014-76,
    author = {Peter Reimann and Holger Schwarz and Bernhard Mitschang},
    title = {{A Pattern Approach to Conquer the Data Complexity in Simulation Workflow Design}},
    booktitle = {Proceedings of OnTheMove Federated Conferences and Workshops (OTM), 22nd International Conference on Cooperative Information Systems (CoopIS 2014)},
    editor = {R. Meersman et al.},
    address = {Amantea, Italy},
    publisher = {Springer Berlin Heidelberg},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    series = {LNCS},
    volume = {8841},
    pages = {21--38},
    type = {Konferenz-Beitrag},
    month = {Oktober},
    year = {2014},
    keywords = {Data Provisioning; Data Management Patterns; SIMPL; Simulation Workflow; Simulation Workflow Design; Workflow; Workflow Design},
    language = {Englisch},
    cr-category = {H.2.5 Heterogeneous Databases,
                   H.2.8 Database Applications,
                   H.4.1 Office Automation},
    contact = {Peter Reimann Peter.Reimann@ipvs.uni-stuttgart.de},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {Scientific workflows may be used to enable the collaborative implementation of
      scientific applications across various domains. Since each domain has its own
      requirements and solutions for data handling, such workflows often have to deal
      with a highly heterogeneous data environment. This results in an increased
      complexity of workflow design. As scientists typically design their scientific
      workflows on their own, this complexity hinders them to concentrate on their
      core issue, namely the experiments, analyses, or simulations they conduct. In
      this paper, we present a novel approach to a pattern-based abstraction support
      for the complex data management in simulation workflows that goes beyond
      related work in similar research areas. A pattern hierarchy with different
      abstraction levels enables a separation of concerns according to the skills of
      different persons involved in workflow design. The goal is that scientists are
      no longer obliged to specify low-level details of data management in their
      workflows. We discuss the advantages of this approach and show to what extent
      it reduces the complexity of simulation workflow design. Furthermore, we
      illustrate how to map patterns onto executable workflows. Based on a
      prototypical implementation of three real-world simulations, we evaluate our
      approach according to relevant requirements.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2014-76&engl=0}
 }

@inproceedings {INPROC-2014-52,
    author = {Peter Reimann and Tim Waizenegger and Matthias Wieland and Holger Schwarz},
    title = {{Datenmanagement in der Cloud f{\"u}r den Bereich Simulationen und Wissenschaftliches Rechnen}},
    booktitle = {Proceedings des 2. Workshop Data Management in the Cloud auf der 44. Jahrestagung der Gesellschaft f{\"u}r Informatik e.V. (GI)},
    editor = {Gesellschaft f{\"u}r Informatik e.V. (GI)},
    address = {Stuttgart, Deutschland},
    publisher = {Lecture Notes in Informatics (LNI)},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    type = {Workshop-Beitrag},
    month = {September},
    year = {2014},
    language = {Deutsch},
    cr-category = {H.2.5 Heterogeneous Databases,
                   H.2.8 Database Applications,
                   H.4.1 Office Automation},
    contact = {Peter Reimann Peter.Reimann@ipvs.uni-stuttgart.de},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware;
                  Universit{\"a}t Stuttgart, Institut f{\"u}r Architektur von Anwendungssystemen},
    abstract = {F{\"u}r Organisationen, die Simulationen nicht als ihr Kerngesch{\"a}ft verstehen und
      sie daher eher sporadisch durchf{\"u}hren, lohnt sich der Betrieb einer eigenen
      Recheninfrastruktur nur selten. Dies betrifft z.B. kleine und mittlere
      Unternehmen sowie einige wissenschaftliche Institutionen. Besserung k{\"o}nnen
      {\"o}ffentliche Cloud-Infrastrukturen als Plattform f{\"u}r die Ausf{\"u}hrung von
      Simulationen verschaffen. Das Datenmanagement in der Cloud ist aber speziell
      f{\"u}r den Bereich Simulationen noch weitgehend unerforscht. In diesem Beitrag
      identifizieren wir daher noch offene Fragestellungen bzgl. des Datenmanagements
      von Simulationen in der Cloud. Dies betrifft vor allem die Datenbereitstellung
      und inwieweit nutzer- und simulationsspezifische Anforderungen an das
      Datenmanagement in der Cloud eingehalten werden k{\"o}nnen. Wir untersuchen
      Technologien, welche sich diesen Fragestellungen widmen, und diskutieren, ob
      und wie sie in der Cloud sowie f{\"u}r Simulationen einsetzbar sind. Weiterhin
      skizzieren wir wichtige zuk{\"u}nftige Forschungsthemen.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2014-52&engl=0}
 }

@inproceedings {INPROC-2014-51,
    author = {Peter Reimann and Holger Schwarz},
    title = {{Simulation Workflow Design Tailor-Made for Scientists}},
    booktitle = {Proceedings of the 26th International Conference on Scientific and Statistical Database Management},
    address = {Aalborg, Denmark},
    publisher = {ACM},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    type = {Demonstration},
    month = {Juni},
    year = {2014},
    keywords = {Data Provisioning; Data Management Patterns; Simulation Workflow; Simulation Workflow Design},
    language = {Englisch},
    cr-category = {H.2.5 Heterogeneous Databases,
                   H.2.8 Database Applications,
                   H.4.1 Office Automation},
    contact = {Peter Reimann Peter.Reimann@ipvs.uni-stuttgart.de},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {Scientific workflows have to deal with highly heterogeneous data environments.
      In particular, they have to carry out complex data provisioning tasks that
      filter and transform heterogeneous input data in such a way that underlying
      tools or services can ingest them. This results in a high complexity of
      workflow design. Scientists often want to design their workflows on their own,
      but usually do not have the necessary skills to cope with this complexity.
      Therefore, we have developed a pattern-based approach to workflow design,
      thereby mainly focusing on workflows that realize numeric simulations. This
      approach removes the burden from scientists to specify low-level details of
      data provisioning. In this demonstration, we apply a prototype implementation
      of our approach to various use cases and show how it makes simulation workflow
      design tailor-made for scientists.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2014-51&engl=0}
 }

@inproceedings {INPROC-2014-50,
    author = {Peter Reimann and Holger Schwarz and Bernhard Mitschang},
    title = {{Data Patterns to Alleviate the Design of Scientific Workflows Exemplified by a Bone Simulation}},
    booktitle = {Proceedings of the 26th International Conference on Scientific and Statistical Database Management},
    address = {Aalborg, Denmark},
    publisher = {ACM},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    type = {Konferenz-Beitrag},
    month = {Juni},
    year = {2014},
    keywords = {Data Provisioning; Data Management Patterns; Workflow; SIMPL; Simulation Workflow; BPEL; WS-BPEL},
    language = {Englisch},
    cr-category = {H.2.5 Heterogeneous Databases,
                   H.2.8 Database Applications,
                   H.4.1 Office Automation},
    contact = {Peter Reimann Peter.Reimann@ipvs.uni-stuttgart.de},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {Scientific workflows often have to process huge data sets in a multiplicity of
      data formats. For that purpose, they typically embed complex data provisioning
      tasks that transform these heterogeneous data into formats the underlying tools
      or services can handle. This results in an increased complexity of workflow
      design. As scientists typically design their scientific workflows on their own,
      this complexity hinders them to concentrate on their core issue, namely the
      experiments, analyses, or simulations they conduct. In this paper, we present
      the core idea of a pattern-based approach to alleviate the design of scientific
      workflows. This approach is particularly targeted at the needs of scientists.
      We exemplify and assess the pattern-based design approach by applying it to a
      complex scientific workflow realizing a real-world simulation of structure
      changes in bones.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2014-50&engl=0}
 }

@inproceedings {INPROC-2014-49,
    author = {Christoph Gr{\"o}ger and Holger Schwarz and Bernhard Mitschang},
    title = {{The Deep Data Warehouse. Link-based Integration and Enrichment of Warehouse Data and Unstructured Content}},
    booktitle = {Proceedings of the 18th IEEE International Enterprise Distributed Object Computing Conference (EDOC), 01-05 September, 2014, Ulm, Germany},
    publisher = {IEEE},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    type = {Konferenz-Beitrag},
    month = {September},
    year = {2014},
    language = {Englisch},
    cr-category = {H.2.7 Database Administration},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {Data warehouses are at the core of enterprise IT and enable the efficient
      storage and analysis of structured data. Besides, unstructured content, e.g.,
      emails and documents, constitutes more than half of the entire enterprise data
      and contains a lot of implicit knowledge about warehouse entities. Thus,
      holistic ana-lytics require the integration of structured warehouse data and
      unstructured content to generate novel insights. These insights can also be
      used to enrich the integrated data and to create a new basis for further
      analytics. Existing integration approaches only support a limited range of
      analytical applications and require the costly adaptation of the warehouse
      schema. In this paper, we present the Deep Data Warehouse (DeepDWH), a novel
      type of data warehouse based on the flexible integration and enrichment of
      warehouse data and unstructured content, addressing the variety challenge of
      Big Data. It relies on information-rich in-stance-level links between warehouse
      elements and content items, which are represented in a graph-oriented
      structure. Neither adaptations of the existing warehouse nor the design of an
      overall federated schema are required. We design a conceptual linking model and
      develop a logical schema for links based on a property graph. As a proof of
      concept, we present a prototypical imple-mentation of the DeepDWH including a
      link store based on a graph database.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2014-49&engl=0}
 }

@inproceedings {INPROC-2014-28,
    author = {Christoph Gr{\"o}ger and Holger Schwarz and Bernhard Mitschang},
    title = {{Prescriptive Analytics for Recommendation-based Business Process Optimization}},
    booktitle = {Proceedings of the 17th International Conference on Business Information Systems (BIS), 22-23 May, 2014, Larnaca, Cyprus},
    editor = {Witold Abramowicz and Angelika Kokkinaki},
    publisher = {Springer},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    series = {176},
    pages = {25--37},
    type = {Konferenz-Beitrag},
    month = {Mai},
    year = {2014},
    keywords = {Prescriptive Analytics, Process Optimization, Process Warehouse, Data Mining, Business Intelligence, Decision Support},
    language = {Englisch},
    cr-category = {H.2.8 Database Applications},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {Continuously improved business processes are a central success factor for
      companies. Yet, existing data analytics do not fully exploit the data generated
      during process execution. Particularly, they miss prescriptive techniques to
      transform analysis results into improvement actions. In this paper, we present
      the data-mining-driven concept of recommendation-based business process
      op-timization on top of a holistic process warehouse. It prescriptively
      generates ac-tion recommendations during process execution to avoid a predicted
      metric de-viation. We discuss data mining techniques and data structures for
      real-time prediction and recommendation generation and present a proof of
      concept based on a prototypical implementation in manufacturing.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2014-28&engl=0}
 }

@inproceedings {INPROC-2014-10,
    author = {Christoph Gr{\"o}ger and Holger Schwarz and Bernhard Mitschang},
    title = {{The Manufacturing Knowledge Repository. Consolidating Knowledge to Enable Holistic Process Knowledge Management in Manufacturing}},
    booktitle = {Proceedings of the 16th International Conference on Enterprise Information Systems (ICEIS), 27-30 April, 2014, Lisbon, Portugal},
    publisher = {SciTePress},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    type = {Konferenz-Beitrag},
    month = {April},
    year = {2014},
    language = {Englisch},
    cr-category = {H.2.7 Database Administration,
                   J.1 Administration Data Processing},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {The manufacturing industry is faced with strong competition making the
      companies’ knowledge resources and their systematic management a critical
      success factor. Yet, existing concepts for the management of process knowledge
      in manufacturing are characterized by major shortcomings. Particularly, they
      are either exclusively based on structured knowledge, e. g., formal rules, or
      on unstructured knowledge, such as documents, and they focus on isolated
      aspects of manufacturing processes. To address these issues, we present the
      Manufacturing Knowledge Repository, a holistic repository that consolidates
      structured and unstructured process knowledge to facilitate knowledge
      management and process optimization in manufacturing. First, we define
      requirements, especially the types of knowledge to be handled, e. g., data
      mining models and text documents. Next, we develop a conceptual repository data
      model associating knowledge items and process components such as machines and
      process steps. Furthermore, we discuss implementation issues including storage
      architecture variants and present both an evaluation of the data model and a
      proof of concept based on a prototypical implementation.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2014-10&engl=0}
 }

@inproceedings {INPROC-2013-02,
    author = {Peter Reimann and Holger Schwarz},
    title = {{Datenmanagementpatterns in Simulationsworkflows}},
    booktitle = {Proceedings der 15. GI-Fachtagung Datenbanksysteme f{\"u}r Business, Technologie und Web (BTW 2013)},
    editor = {Gesellschaft f{\"u}r Informatik (GI)},
    address = {Magdeburg},
    publisher = {Lecture Notes in Informatics (LNI)},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    series = {Gesellschaft f{\"u}r Informatik (GI)},
    pages = {279--293},
    type = {Konferenz-Beitrag},
    month = {M{\"a}rz},
    year = {2013},
    keywords = {Datenbereitstellung; Datenmanagementpatterns; Workflow; SIMPL; Simulationsworkflow; BPEL; WS-BPEL},
    language = {Deutsch},
    cr-category = {H.2.5 Heterogeneous Databases,
                   H.2.8 Database Applications,
                   H.4.1 Office Automation},
    contact = {Peter Reimann Peter.Reimann@ipvs.uni-stuttgart.de},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {Simulationsworkflows m{\"u}ssen oftmals gro{\ss}e Datenmengen verarbeiten, die in einer
      Vielzahl propriet{\"a}rer Formate vorliegen. Damit diese Daten von den im Workflow
      eingebundenen Programmen und Diensten verarbeitet werden k{\"o}nnen, m{\"u}ssen sie in
      passende Formate transformiert werden. Dies erh{\"o}ht die Komplexit{\"a}t der
      Workflowmodellierung, welche i.d.R. durch die Wissenschaftler selbst erfolgt.
      Dadurch k{\"o}nnen sich diese weniger auf den Kern der eigentlichen Simulation
      konzentrieren. Zur Behebung dieses Defizits schlagen wir einen Ansatz vor, mit
      dem die Aktivit{\"a}ten zur Datenbereitstellung in Simulationsabl{\"a}ufen abstrakt
      modelliert werden k{\"o}nnen. Wissenschaftler sollen keine Implementierungsdetails,
      sondern lediglich die Kernaspekte der Datenbereitstellung in Form von Patterns
      beschreiben. Die Spezifikation der Patterns soll dabei m{\"o}glichst in der Sprache
      der mathematischen Simulationsmodelle erfolgen, mit denen Wissenschaftler
      vertraut sind. Eine Erweiterung des Workflowsystems bildet die Patterns
      automatisch auf ausf{\"u}hrbare Workflowfragmente ab, welche die
      Datenbereitstellung umsetzen. Dies alles reduziert die Komplexit{\"a}t der
      Modellierung von Simulationsworkflows und erh{\"o}ht die Produktivit{\"a}t der
      Wissenschaftler.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2013-02&engl=0}
 }

@inproceedings {INPROC-2012-14,
    author = {Christoph Gr{\"o}ger and Florian Niedermann and Holger Schwarz and Bernhard Mitschang},
    title = {{Supporting Manufacturing Design by Analytics. Continuous Collaborative Process Improvement enabled by the Advanced Manufacturing Analytics Platform}},
    booktitle = {Proceedings of the 2012 16th IEEE International Conference on Computer Supported Cooperative Work in Design (CSCWD), May 23-25, 2012, Wuhan, China},
    editor = {Liang Gao and Weiming Shen and Jean-Paul Barth{\`e}s and Junzhou Luo and Jianming Yong and Wenfeng Li and Weidong Li},
    publisher = {IEEE},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    pages = {793--799},
    type = {Konferenz-Beitrag},
    month = {Mai},
    year = {2012},
    isbn = {978-1-4673-1210-3},
    keywords = {Analytics; Data Mining; Process Management; Manufacturing; Process Optimization},
    language = {Englisch},
    cr-category = {H.2.8 Database Applications,
                   J.1 Administration Data Processing},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {The manufacturing industry is faced with global competition making efficient,
      effective and continuously improved manufacturing processes a critical success
      factor. Yet, media discontinuities, the use of isolated analysis methods on
      local data sets as well as missing means for sharing analysis results cause a
      collaborative gap in Manufacturing Process Management that prohibits continuous
      process improvement. To address this chal-lenge, this paper proposes the
      Advanced Manufacturing Analytics (AdMA) Platform that bridges the gap by
      integrating operational and process manufacturing data, defining a reposito-ry
      for analysis results and providing indication-based and pat-tern-based
      optimization techniques. Both the conceptual architec-ture underlying the
      platform as well as its current implementa-tion are presented in this paper.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2012-14&engl=0}
 }

@inproceedings {INPROC-2011-37,
    author = {Sylvia Radesch{\"u}tz and Marko Vrhovnik and Holger Schwarz and Bernhard Mitschang},
    title = {{Exploiting the Symbiotic Aspects of Process and Operational Data for Optimizing Business Processes}},
    booktitle = {Proc. of the 12th IEEE International Conference on Information Reuse and Integration (IRI 2011)},
    address = {Las Vegas, USA},
    publisher = {IEEE},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    pages = {1--6},
    type = {Konferenz-Beitrag},
    month = {August},
    year = {2011},
    language = {Englisch},
    cr-category = {H.2.4 Database Management Systems},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {A profound analysis of all relevant business data in a company is necessary for
      optimizing business processes effectively. Current analyses typically run
      either on business process execution data or on operational business data.
      Correlations among the separate data sets have to be found manually under big
      effort. However, to achieve a more informative analysis and to fully optimize a
      company’s business, an efficient consolidation of all major data sources is
      indispensable. Recent matching algorithms are insufficient for this task since
      they are restricted either to schema or to process matching. We present a new
      matching framework to combine process data models and operational data models
      (semi-)automatically for performing such a profound business analysis. We
      describe this approach and its basic matching rules as well as an experimental
      study that shows the achieved high recall and precision.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2011-37&engl=0}
 }

@inproceedings {INPROC-2011-25,
    author = {Florian Niedermann and Holger Schwarz},
    title = {{Deep Business Optimization: Making Business Process Optimization Theory Work in Practice}},
    booktitle = {Proceedings of the Conference on Business Process Modeling, Development and Support (BPMDS 2011)},
    publisher = {Springer},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    pages = {1--10},
    type = {Konferenz-Beitrag},
    month = {Juni},
    year = {2011},
    keywords = {Business Process Optimization, Optimization Techniques, Business Process Analytics, Data Mining, Tool Support},
    language = {Englisch},
    cr-category = {H.4.1 Office Automation},
    ee = {ftp://ftp.informatik.uni-stuttgart.de/pub/library/ncstrl.ustuttgart_fi/INPROC-2011-25/INPROC-2011-25.pdf},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {The success of most of today's businesses is tied to the efficiency and
      effectiveness of their core processes. This importance has been recognized in
      research, leading to a wealth of sophisticated process optimization and
      analysis techniques. Their use in practice is, however, often limited as both
      the selection and the application of the appropriate techniques are challenging
      tasks. Hence, many techniques are not considered causing potentially
      significant opportunities of improvement not to be implemented. This paper
      proposes an approach to addressing this challenge using our deep Business
      Optimization Platform. By integrating a catalogue of formalized optimization
      techniques with data analysis and integration capabilities, it assists analysts
      both with the selection and the application of the most fitting optimization
      techniques for their specific situation. The paper presents both the concepts
      underlying this platform as well as its prototypical implementation.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2011-25&engl=0}
 }

@inproceedings {INPROC-2011-24,
    author = {Florian Niedermann and Bernhard Maier and Sylvia Radesch{\"u}tz and Holger Schwarz and Bernhard Mitschang},
    title = {{Automated Process Decision Making based on Integrated Source Data}},
    booktitle = {Proceedings of the 14th International Conference on Business Information Systems (BIS 2011)},
    editor = {Witold Abramowicz},
    publisher = {Springer},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    series = {Lecture Notes in Business Information Processing},
    pages = {1--10},
    type = {Konferenz-Beitrag},
    month = {Juni},
    year = {2011},
    keywords = {Data Mining, Decision Automation, Data Integration, Business Process Management, Data-driven Processes},
    language = {Englisch},
    cr-category = {H.4.1 Office Automation,
                   H.2.8 Database Applications,
                   H.5.2 Information Interfaces and Presentation User Interfaces},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {The success of most of today's businesses is tied to the efficiency and
      effectiveness of their core processes. Yet, two major challenges often prevent
      optimal processes: First, the analysis techniques applied during the
      optimization are inadequate and fail to include all relevant data sources.
      Second, the success depends on the abilities of the individual analysts to spot
      the right designs amongst a plethora of choices. Our deep Business Optimization
      Platform addresses these challenges through specialized data integration,
      analysis and optimization facilities. In this paper, we focus on how it uses
      formalized process optimization patterns for detecting and implementing process
      improvements.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2011-24&engl=0}
 }

@inproceedings {INPROC-2011-07,
    author = {Peter Reimann and Michael Reiter and Holger Schwarz and Dimka Karastoyanova and Frank Leymann},
    title = {{SIMPL - A Framework for Accessing External Data in Simulation Workflows}},
    booktitle = {Datenbanksysteme f{\"u}r Business, Technologie und Web (BTW 2011), 14. Fachtagung des GI-Fachbereichs „Datenbanken und Informationssysteme“ (DBIS), Proceedings, 02.-04. M{\"a}rz 2011, Kaiserslautern, Germany},
    editor = {Gesellschaft f{\"u}r Informatik (GI)},
    publisher = {Lecture Notes in Informatics (LNI)},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    series = {Series of the Gesellschaft f{\"u}r Informatik (GI)},
    volume = {180},
    pages = {534--553},
    type = {Konferenz-Beitrag},
    month = {M{\"a}rz},
    year = {2011},
    isbn = {978-3-88579-274-1},
    keywords = {Data Provisioning; Workflow; Scientific Workflow; Simulation Workflow; BPEL; WS-BPEL; SIMPL},
    language = {Englisch},
    cr-category = {H.2.8 Database Applications,
                   H.4.1 Office Automation},
    contact = {Peter Reimann Peter.Reimann@ipvs.uni-stuttgart.de},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware;
                  Universit{\"a}t Stuttgart, Institut f{\"u}r Architektur von Anwendungssystemen},
    abstract = {Adequate data management and data provisioning are among the most important
      topics to cope with the information explosion intrinsically associated with
      simulation applications. Today, data exchange with and between simulation
      applications is mainly accomplished in a file-style manner. These files show
      proprietary formats and have to be transformed according to the specific needs
      of simulation applications. Lots of effort has to be spent to find appropriate
      data sources and to specify and implement data transformations. In this paper,
      we present SIMPL – an extensible framework that provides a generic and
      consolidated abstraction for data management and data provisioning in
      simulation workflows. We introduce extensions to workflow languages and show
      how they are used to model the data provisioning for simulation workflows based
      on data management patterns. Furthermore, we show how the framework supports a
      uniform access to arbitrary external data in such workflows. This removes the
      burden from engineers and scientists to specify low-level details of data
      management for their simulation applications and thus boosts their
      productivity.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2011-07&engl=0}
 }

@inproceedings {INPROC-2009-29,
    author = {Fabian Kaiser and Holger Schwarz and Mih{\'a}ly Jakob},
    title = {{Using Wikipedia-based conceptual contexts to calculate document similarity}},
    booktitle = {ICDS2009: Proceedings of the 3rd International Conference on Digital Society},
    address = {Cancun, Mexico},
    publisher = {IEEE Computer Society},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    pages = {322--327},
    type = {Konferenz-Beitrag},
    month = {Februar},
    year = {2009},
    language = {Englisch},
    cr-category = {H.3 Information Storage and Retrieval,
                   H.3.3 Information Search and Retrieval},
    ee = {http://dx.doi.org/10.1109/ICDS.2009.7},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {Rating the similarity of two or more text documents is an essential task in
      information retrieval. For example, document similarity can be used to rank
      search engine results, cluster documents according to topics etc. A major
      challenge in calculating document similarity originates from the fact that two
      documents can have the same topic or even mean the same, while they use
      different wording to describe the content. A sophisticated algorithm therefore
      will not directly operate on the texts but will have to find a more abstract
      representation that captures the texts' meaning. In this paper, we propose a
      novel approach for calculating the similarity of text documents. It builds on
      conceptual contexts that are derived from content and structure of the
      Wikipedia hypertext corpus.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2009-29&engl=0}
 }

@inproceedings {INPROC-2008-02,
    author = {Marko Vrhovnik and Holger Schwarz and Sylvia Radesch{\"u}tz and Bernhard Mitschang},
    title = {{An Overview of SQL Support in Workflow Products}},
    booktitle = {Proc. of the 24th International Conference on Data Engineering (ICDE 2008), Canc{\'u}n, M{\'e}xico, April 7-12, 2008},
    publisher = {IEEE},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    pages = {1--8},
    type = {Konferenz-Beitrag},
    month = {April},
    year = {2008},
    language = {Englisch},
    cr-category = {H.2.4 Database Management Systems},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {Over the last years, data management products as well as workflow products have
      established themselves as indispensable building blocks for advanced IT systems
      in almost all application areas. Recently, many vendors have created innovative
      product extensions that combine service-oriented frameworks with powerful
      workflow and data management capabilities.
      
      In this paper, we discuss several workflow products from different vendors with
      a specific focus on their SQL support. We provide a comparison based on a set
      of important data management patterns and illustrate the characteristics of
      various approaches by means of a running example.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2008-02&engl=0}
 }

@inproceedings {INPROC-2008-01,
    author = {Marko Vrhovnik and Holger Schwarz and Stephan Ewen and Oliver Suhre},
    title = {{PGM/F: A Framework for the Optimization of Data Processing in Business Processes}},
    booktitle = {Proc. of the 24th International Conference on Data Engineering (ICDE 2008), Canc{\'u}n, M{\'e}xico, April 7-12, 2008},
    publisher = {IEEE},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    pages = {1--4},
    type = {Konferenz-Beitrag},
    month = {April},
    year = {2008},
    language = {Englisch},
    cr-category = {H.2.4 Database Management Systems},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {Business process execution languages like BPEL are broadly adopted by industry
      to integrate the heterogeneous applications and data store of an enterprise.
      Leading vendors provide extensions to BPEL that allow for a tight integration
      of data processing capabilities into the process logic. Business processes
      exploiting these capabilities show a remarkable potential for optimization. In
      this demonstration, we present PGMOF, a framework for the optimization of data
      processing in such business processes.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2008-01&engl=0}
 }

@inproceedings {INPROC-2007-66,
    author = {Mih{\'a}ly Jakob and Oliver Schiller and Holger Schwarz and Fabian Kaiser},
    title = {{flashWeb: Graphical Modeling of Web Applications for Data Management}},
    booktitle = {Tutorials, posters, panels and industrial contributions at the 26th International Conference on Conceptual Modeling - ER 2007, Auckland New Zealand, December 2007. Vol. 83},
    editor = {John Grundy and Sven Hartmann and Alberto H. F. Laender and Leszek Maciaszek and John F. Roddick},
    address = {Auckland, New Zealand},
    publisher = {ACS},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    pages = {59--64},
    type = {Konferenz-Beitrag},
    month = {Dezember},
    year = {2007},
    isbn = {978-1-920682-64-4},
    keywords = {Model-driven Web engineering; Web application modeling; Code Generation},
    language = {Englisch},
    cr-category = {D.2.3 Software Engineering Coding Tools and Techniques,
                   D.2.11 Software Engineering Software Architectures,
                   H.4 Information Systems Applications,
                   H.5.4 Hypertext/Hypermedia},
    ee = {ftp://ftp.informatik.uni-stuttgart.de/pub/library/ncstrl.ustuttgart_fi/INPROC-2007-66/INPROC-2007-66.pdf},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {This paper presents flashWeb, a Computer-Aided Web Engineering (CAWE) tool for
      the model-driven development of web applications that focus on data management.
      Present-day web applications, like on-line auction systems or enterprise web
      portals require comprehensive data access, data processing and data
      manipulation capabilities. However, existing web application development
      approaches treat data management operations as second-class citizens. They
      integrate data operations into existing models or derive them as a by-product
      of business processes. We argue that data management is an important part of
      the application logic hence we capture operations with an additional Operation
      Model. We show that the explicit modeling of operations provides many benefits
      that distinguish our solution from other approaches. We present the flashWeb
      development process utilizing a graphical notation for the models in use, a
      CAWE tool that supports the creation of the graphical models and a code
      generator that creates ready-to-run web applications.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2007-66&engl=0}
 }

@inproceedings {INPROC-2007-61,
    author = {Fabian Kaiser and Holger Schwarz and Mih{\'a}ly Jakob},
    title = {{EXPOSE: Searching the Web for Expertise}},
    booktitle = {Proceedings of the 30th Annual International ACM SIGIR Conference on Research and Development in Information Retrieval, Amsterdam, The Netherlands, July 23-27, 2007.},
    editor = {Wessel Kraaij and Arjen P. de Vries and Charles L. A. Clarke and Norbert Fuhr and Noriko Kando},
    publisher = {ACM},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    pages = {1--1},
    type = {Konferenz-Beitrag},
    month = {Januar},
    year = {2007},
    isbn = {978-1-59593-597-7},
    keywords = {Expert Finding; Search Engine; Information Retrieval; Web Search; Knowledge Management},
    language = {Englisch},
    cr-category = {H.3.3 Information Search and Retrieval},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {In this demonstration we will present EXPOSE, our solution to finding experts
      on the web. We show how EXPOSE supports the user in diverse tasks throughout
      the whole search process and how using EXPOSE can improve the result quality
      compared to ad-hoc searches with common web search engines.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2007-61&engl=0}
 }

@inproceedings {INPROC-2007-29,
    author = {Rodrigo Monteiro and Geraldo Zimbrao and Holger Schwarz and Bernhard Mitschang and Jano Souza},
    title = {{DWFIST: Leveraging Calendar-based Pattern Mining in Data Streams}},
    booktitle = {Proc. of the 9th International Conference on Data Warehousing and Knowledge Discovery (DaWaK 2007) Regensburg, Germany, 3-7 September, 2007},
    publisher = {-},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    pages = {438--448},
    type = {Konferenz-Beitrag},
    month = {September},
    year = {2007},
    language = {Englisch},
    cr-category = {H.2.8 Database Applications},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {Calendar-based pattern mining aims at identifying patterns on specific calendar
      partitions. Potential calendar partitions are for example: every Monday, every
      first working day of each month, every holiday. Providing flexible mining
      capabilities for calendar-based partitions is especially challenging in a data
      stream scenario. The calendar partitions of interest are not known a priori and
      at each point in time only a subset of the detailed data is available. We show
      how a data warehouse approach can be applied to this problem. The data
      warehouse that keeps track of frequent itemsets holding on different partitions
      of the original stream has low storage requirements. Nevertheless, it allows to
      derive sets of patterns that are complete and precise. This work demonstrates
      the effectiveness of our approach by a series of experiments.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2007-29&engl=0}
 }

@inproceedings {INPROC-2007-28,
    author = {Marko Vrhovnik and Holger Schwarz and Oliver Suhre and Bernhard Mitschang and Volker Markl and Albert Maier and Tobias Kraft},
    title = {{An Approach to Optimize Data Processing in Business Processes}},
    booktitle = {Proc. of the 33rd International Conference on Very Large Data Bases (VLDB 2007), Vienna, Austria, September 23-28, 2007},
    publisher = {-},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    pages = {1--12},
    type = {Konferenz-Beitrag},
    month = {September},
    year = {2007},
    language = {Englisch},
    cr-category = {H.2.4 Database Management Systems},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {In order to optimize their revenues and profits, an increasing number of
      businesses organize their business activities in terms of business processes.
      Typically, they automate important business tasks by orchestrating a number of
      applications and data stores. Obviously, the performance of a business process
      is directly dependent on the efficiency of data access, data processing, and
      data management.
      
      In this paper, we propose a framework for the optimization of data processing
      in business processes. We introduce a set of rewrite rules that transform a
      business process in such a way that an improved execution with respect to data
      management can be achieved without changing the semantics of the original
      process. These rewrite rules are based on a semi-procedural process graph model
      that externalizes data dependencies as well as control flow dependencies of a
      business process. Furthermore, we present a multi-stage control strategy for
      the optimization process. We illustrate the benefits and opportunities of our
      approach through a prototype implementation. Our experimental results
      demonstrate that independent of the underlying database system performance
      gains of orders of magnitude are achievable by reasoning about data and control
      in a unified framework.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2007-28&engl=0}
 }

@inproceedings {INPROC-2007-105,
    author = {Tobias Kraft and Holger Schwarz and Bernhard Mitschang},
    title = {{A Statistics Propagation Approach to Enable Cost-Based Optimization of Statement Sequences}},
    booktitle = {Proc. of the 11th East European Conference on Advances in Databases and Information Systems (ADBIS 2007), Varna, Bulgaria, September 29 - October 3, 2007},
    publisher = {-},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    pages = {267--282},
    type = {Konferenz-Beitrag},
    month = {September},
    year = {2007},
    language = {Englisch},
    cr-category = {H.2.4 Database Management Systems},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2007-105&engl=0}
 }

@inproceedings {INPROC-2006-57,
    author = {Christoph Mangold and Holger Schwarz},
    title = {{Documents meet Databases: A System for Intranet Search}},
    booktitle = {13th International Conference on Management of Data (COMAD 2006), Delhi, India, December 14-16, 2006},
    editor = {L. V. S. Lakshmanan and P. Roy and A. K. H. Tung},
    address = {New Delhi},
    publisher = {Tata McGraw-Hill Publishing Company Limited},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    pages = {227--230},
    type = {Konferenz-Beitrag},
    month = {Dezember},
    year = {2006},
    isbn = {0-07-063374-6},
    language = {Englisch},
    cr-category = {H.3.3 Information Search and Retrieval},
    ee = {ftp://ftp.informatik.uni-stuttgart.de/pub/library/ncstrl.ustuttgart_fi/INPROC-2006-57/INPROC-2006-57.pdf},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {In enterprise intranets, information is encoded in documents and databases.
      Logically, the information in both worlds is tightly connected, however, on the
      system level there is usually a large gap. In this paper, we propose a system
      to retrieve documents in the enterprise intranet. The system is an extension to
      common text search. It does not only consider the content of documents but also
      it exploits the enterprise databases to determine the documents' context.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2006-57&engl=0}
 }

@inproceedings {INPROC-2006-56,
    author = {Christoph Mangold and Holger Schwarz and Bernhard Mitschang},
    title = {{Symbiosis in the Intranet: How Document Retrieval Benefits from Database Information}},
    booktitle = {13th International Conference on Management of Data (COMAD 2006), December 14-16, 2006, Delhi, India},
    editor = {L. V. S. Lakshmanan and P. Roy and A. K. H. Tung},
    address = {New Delhi},
    publisher = {Tata McGraw-Hill Publishing Company Limited},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    pages = {201--204},
    type = {Konferenz-Beitrag},
    month = {Dezember},
    year = {2006},
    isbn = {0-07-063374-6},
    language = {Englisch},
    cr-category = {H.3.3 Information Search and Retrieval},
    ee = {ftp://ftp.informatik.uni-stuttgart.de/pub/library/ncstrl.ustuttgart_fi/INPROC-2006-56/INPROC-2006-56.pdf},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {The enterprise information space is split in two hemispheres. Documents contain
      unstructured or semistructured information; structured information is stored in
      databases. As regards the content, both kinds of information are complementary
      parts. However, enterprise information systems usually focus on one part, only.
      Our approach improves document retrieval in the intranet by exploiting the
      enterprise's databases. In particular, we exploit database information to
      describe the context of documents and exploit this context to enhance common
      full text search. In this paper, we show how to model and compute document
      context and present results on runtime performance},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2006-56&engl=0}
 }

@inproceedings {INPROC-2006-52,
    author = {Christoph Mangold and Holger Schwarz and Bernhard Mitschang},
    title = {{u38: A Framework for Database-Supported Enterprise Document-Retrieval}},
    booktitle = {Proceedings of the Tenth International Database Engineering \& Apllications Symposium (IDEAS2006), Delhi, India, December 11-14, 2006},
    publisher = {IEEE Computer Society},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    pages = {1--8},
    type = {Konferenz-Beitrag},
    month = {Dezember},
    year = {2006},
    language = {Englisch},
    cr-category = {H.3.3 Information Search and Retrieval},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {In enterprises, information is encoded in documents and databases. Logically,
      the information in both worlds is tightly connected, however, on the system
      level there is usu- ally a large gap. In this paper, we propose a framework
      that improves document retrieval by exploiting available enter- prise
      databases. In particular, we use database information to model the context of
      documents and incorporate this con- text in our search framework. We present
      our framework architecture, its components and its major interfaces. The
      framework can be configured and enhanced at well-defined points and, hence, can
      easily be customized to other do- mains. We furthermore evaluate its core
      components. Our experiments show that the context-aware approach signifi-
      cantly improves the quality of search results.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2006-52&engl=0}
 }

@inproceedings {INPROC-2006-49,
    author = {Mih{\'a}ly Jakob and Holger Schwarz and Fabian Kaiser and Bernhard Mitschang},
    title = {{Towards an operation model for generated web applications}},
    booktitle = {Workshop proceedings of the sixth international conference on Web engineering (MDWE 2006); Palo Alto, California, USA, July 2006},
    editor = {Association for Computing Machinery (ACM)},
    address = {New York},
    publisher = {ACM Press New York, NY, USA},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    type = {Konferenz-Beitrag},
    month = {Juli},
    year = {2006},
    language = {Englisch},
    cr-category = {D.2.3 Software Engineering Coding Tools and Techniques,
                   D.2.11 Software Engineering Software Architectures,
                   H.4 Information Systems Applications,
                   H.5.4 Hypertext/Hypermedia},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {This paper describes a new approach for the development of data-intensive web
      applications that depend on non-trivial data manipulation. E-Commerce web
      sites, on-line auction systems and large enterprise web portals fall into this
      category as they require comprehensive data access, data processing and data
      manipulation capabilities. However, existing methodologies mainly concentrate
      on modeling content, navigation and presentation aspects of read-only web
      sites. Approaches that consider modeling data operations incorporate them into
      existing models resulting in a less clear design. We argue that existing models
      are not sufficient to express complex operations that access or modify web
      application content. Therefore, we propose an additional Operation Model
      defining operations for data-intensive web applications. We also propose the
      utilization of a web application generator to create an Operation Layer based
      on this Operation Model.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2006-49&engl=0}
 }

@inproceedings {INPROC-2006-48,
    author = {Mih{\'a}ly Jakob and Holger Schwarz and Fabian Kaiser and Bernhard Mitschang},
    title = {{Modeling and Generating Application Logic for Data-Intensive Web Applications}},
    booktitle = {Proceedings of the 6th international conference on Web engineering (ICWE2006); Palo Alto, California, USA, July 2006},
    editor = {Association for Computing Machinery (ACM)},
    address = {New York},
    publisher = {ACM Press New York, NY, USA},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    pages = {77--84},
    type = {Konferenz-Beitrag},
    month = {Juli},
    year = {2006},
    language = {Englisch},
    cr-category = {D.2.3 Software Engineering Coding Tools and Techniques,
                   D.2.11 Software Engineering Software Architectures,
                   H.4 Information Systems Applications,
                   H.5.4 Hypertext/Hypermedia},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {This paper presents a new approach for the development of data-intensive web
      applications that depend on sophisticated application logic. E-Commerce web
      sites, on-line auction systems and large enterprise web portals fall into this
      category as they require comprehensive data access, data processing and data
      manipulation capabilities. However, existing methodologies mainly concentrate
      on modeling content, navigation and presentation aspects of read-only web
      sites. In our opinion these models are not sufficient to express complex
      operations that access or modify web application content. Therefore, we propose
      an additional Operation Model defining the application logic of a web
      application. We show that based on this model a significant part of a web
      application’s Operation Layer can be generated, still allowing the manual
      implementation of arbitrary additional functionality. We evaluate our approach
      and present experimental results based on a large example application for the
      area of innovation management.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2006-48&engl=0}
 }

@inproceedings {INPROC-2006-14,
    author = {Fabian Kaiser and Holger Schwarz and Mih{\'a}ly Jakob},
    title = {{Finding Experts on the Web}},
    booktitle = {Proceedings of the Second International Conference on Web Information Systems and Technologies, Set{\'u}bal, Portugal, April 11-13, 2006},
    editor = {Jos{\'e} Cordeiro and Vitor Pedrosa and Bruno Encarnacao and Joaquim Filipe},
    publisher = {INSTICC},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    pages = {363--368},
    type = {Konferenz-Beitrag},
    month = {April},
    year = {2006},
    language = {Englisch},
    cr-category = {H.3.3 Information Search and Retrieval},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {In this paper, we present an integrated approach on finding experts for
      arbitrary user defined topics on the World Wide Web. We discuss the special
      challenges that come along with this issue and why solely applying standard
      techniques and standard tools like Web search engines is not suitable. We point
      out the necessity for a dedicated expert search engine, based on a Focused
      Crawler. The main contribution of our work is an approach to integrate standard
      Web search engines into the process of searching for experts to utilize the
      search engines' knowledge about content and structure of the Web.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2006-14&engl=0}
 }

@inproceedings {INPROC-2005-57,
    author = {Christoph Mangold and Holger Schwarz and Bernhard Mitschang},
    title = {{Improving Intranet Search Engines Using Context Information from Databases}},
    booktitle = {Proceedings of the 14th ACM International Conference on Information and Knowledge Management (CIKM 2005), Bremen, Germany, October 31 - November 5, 2005},
    editor = {A. Chowdhury and N. Fuhr and M. Ronthaler and H.-J. Schek and W. Teiken},
    publisher = {ACM Press},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    pages = {349--350},
    type = {Konferenz-Beitrag},
    month = {Oktober},
    year = {2005},
    isbn = {1-59593-140-6},
    language = {Englisch},
    cr-category = {H.3.3 Information Search and Retrieval},
    ee = {ftp://ftp.informatik.uni-stuttgart.de/pub/library/ncstrl.ustuttgart_fi/INPROC-2005-57/INPROC-2005-57.pdf},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {Information in enterprises comes in documents and databases. From a semantic
      viewpoint, both kinds of information are usually tightly connected. In this
      paper, we propose to enhance common search-engines with contextual information
      retrieved from databases. We establish system requirements and anecdotally
      demonstrate how documents and database information can be represented as the
      nodes of a graph. Then, we give an example how we exploit this graph
      information for document retrieval.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2005-57&engl=0}
 }

@inproceedings {INPROC-2005-36,
    author = {Mih{\'a}ly Jakob and Fabian Kaiser and Holger Schwarz},
    title = {{SEMAFOR: A Framework for an Extensible Scenario Management System}},
    booktitle = {Proc. of the IEEE International Engineering Management Conference (IEMC) 2005; 2005 in St. John's, Newfoundland, September 11-14, 2005},
    publisher = {IEEE},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    pages = {1--5},
    type = {Konferenz-Beitrag},
    month = {September},
    year = {2005},
    language = {Englisch},
    cr-category = {H.3 Information Storage and Retrieval},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {The development of successful new products and services in highly dynamic
      business environments has become an extremely difficult task. Innovation
      managers have to utilize a considerable amount of enterprise-internal and
      enterprise-external information to judge the potential of new products,
      services and technologies.
      
      Scenario management is a powerful instrument to face this problem. Scenarios
      represent an intuitive concept to comprehend complex present-day and future
      situations. The scenario technique is a method for building such scenarios.
      Assessments of future business environments relying on structured scenarios
      enable innovation managers to target lucrative market segments and to select
      promising product ideas. However, diverse enterprise-internal and
      enterprise-external resources have to be utilized to assist the scenario
      development process. Last but not least, existing methods are often extremely
      time-consuming and existing tools for scenario development fail to provide a
      comprehensive solution as they are limited to certain steps in fixed workflows.
      
      In this paper, we propose a modular web-based framework for the flexible and
      efficient development and handling of scenarios. Key aspects of our framework
      are module-based enterprise-specific workflows, the integration of distributed
      human resources into the scenario development process and diverse interfaces to
      enterprise-internal and enterprise-external information sources. The framework
      is based on self-contained software modules that cover important steps of the
      scenario management process. This modularity allows the easy supplementation of
      existing 'scenario technique' methods with newly developed methods that are
      incorporated into modules and can be combined in a flexible way to fit
      enterprise-specific requirements.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2005-36&engl=0}
 }

@inproceedings {INPROC-2005-17,
    author = {Rodrigo Salvador Monteiro and Geraldo Zimbrao and Holger Schwarz and Bernhard Mitschang and Jano Moreira De Souza},
    title = {{Building the Data Warehouse of Frequent Itemsets in the DWFIST Approach}},
    booktitle = {Proceedings of the 15th International Symposium on Methodologies for Intelligent Systems Saratoga Springs, New York - May 25-28, 2005},
    publisher = {Springer},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    pages = {1--9},
    type = {Konferenz-Beitrag},
    month = {Mai},
    year = {2005},
    isbn = {3-540-25878-7},
    language = {Englisch},
    cr-category = {H.2.7 Database Administration,
                   H.2.8 Database Applications},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {Some data mining tasks can produce such great amounts of data that we have to
      cope with a new knowledge management problem. Frequent itemset mining fits in
      this category. Different approaches were proposed to handle or avoid somehow
      this problem. All of them have problems and limitations. In particular, most of
      them need the original data during the analysis phase, which is not feasible
      for data streams. The DWFIST (Data Warehouse of Frequent ItemSets Tactics)
      approach aims at providing a powerful environment for the analysis of itemsets
      and derived patterns, such as association rules, without accessing the original
      data during the analysis phase. This approach is based on a Data Warehouse of
      Frequent Itemsets. It provides frequent itemsets in a flexible and efficient
      way as well as a standardized logical view upon which analytical tools can be
      developed. This paper presents how such a data warehouse can be built.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2005-17&engl=0}
 }

@inproceedings {INPROC-2004-30,
    author = {Tobias Kraft and Holger Schwarz},
    title = {{CHICAGO: A Test and Evaluation Environment for Coarse-Grained Optimization}},
    booktitle = {Proceedings of the 30th International Conference on Very Large Databases, Toronto, Canada, August 29th - September 3rd, 2004},
    publisher = {Morgan Kaufmann},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    pages = {1345--1348},
    type = {Konferenz-Beitrag},
    month = {August},
    year = {2004},
    isbn = {0-12-088469-0},
    language = {Deutsch},
    cr-category = {H.2.4 Database Management Systems},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {Relational OLAP tools and other database applications generate sequences of SQL
      statements that are sent to the database server as result of a single
      information request issued by a user. Coarse-Grained Optimization is a
      practical approach for the optimization of such statement sequences based on
      rewrite rules. In this demonstration we present the CHICAGO test and evaluation
      environment that allows to assess the effectiveness of rewrite rules and
      control strategies. It includes a lightweight heuristic optimizer that modifies
      a given statement sequence using a small and variable set of rewrite rules.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2004-30&engl=0}
 }

@inproceedings {INPROC-2003-04,
    author = {Tobias Kraft and Holger Schwarz and Ralf Rantzau and Bernhard Mitschang},
    title = {{Coarse-Grained Optimization: Techniques for Rewriting SQL Statement Sequences}},
    booktitle = {Proceedings of 29th International Conference on Very Large Data Bases (VLDB 2003), Berlin, September 9-12, 2003},
    publisher = {Morgan Kaufmann},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Germany},
    pages = {488--499},
    type = {Konferenz-Beitrag},
    month = {September},
    year = {2003},
    isbn = {0-12-722442-4},
    keywords = {SQL; Query Optimization; OLAP},
    language = {Englisch},
    cr-category = {H.2.4 Database Management Systems},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte H{\"o}chstleistungsrechner, Anwendersoftware},
    abstract = {Relational OLAP tools and other database applications generate sequences of SQL
      statements that are sent to the database server as result of a single
      information request provided by a user. Unfortunately, these sequences cannot
      be processed efficiently by current database systems because they typically
      optimize and process each statement in isolation. We propose a practical
      approach for this optimization problem, called ``coarse-grained optimization,''
      complementing the conventional query optimization phase. This new approach
      exploits the fact that statements of a sequence are correlated since they
      belong to the same information request. A lightweight heuristic optimizer
      modifies a given statement sequence using a small set of rewrite rules. Since
      the optimizer is part of a separate system layer, it is independent of but can
      be tuned to a specific underlying database system. We discuss implementation
      details and demonstrate that our approach leads to significant performance
      improvements.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2003-04&engl=0}
 }

@inproceedings {INPROC-2001-32,
    author = {Holger Schwarz and Ralf Wagner and Bernhard Mitschang},
    title = {{Improving the Processing of Decision Support Queries: The Case for a DSS Optimizer}},
    booktitle = {Proc. of the 2001 International Database Engineering \& Applications Symposium (IDEAS), July 16-18, 2001},
    editor = {Michel Adiba and Christine Collet and Bipin C. Desai},
    address = {Los Alamitos, Washington, Brussels, Tokyo},
    publisher = {IEEE Computer Society},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Germany},
    pages = {177--186},
    type = {Konferenz-Beitrag},
    month = {Juli},
    year = {2001},
    isbn = {0-7695-1140-6},
    keywords = {Decision Support; OLAP; Data Warehouse},
    language = {Englisch},
    cr-category = {H.4.2 Information Systems Applications Types of Systems},
    ee = {ftp://ftp.informatik.uni-stuttgart.de/pub/library/ncstrl.ustuttgart_fi/INPROC-2001-32/INPROC-2001-32.pdf},
    contact = {holger.schwarz@informatik.uni-stuttgart.de},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte H{\"o}chstleistungsrechner, Anwendersoftware},
    abstract = {Many decision support applications are built upon data mining and OLAP tools
      and allow users to answer information requests based on a data warehouse that
      is managed by a powerful DBMS. In this paper, we focus on tools that generate
      sequences of SQL statements in order to produce the requested information. Our
      thorough analysis revealed that many sequences of queries that are generated by
      commercial tools are not very efficient. An optimized system architecture is
      suggested for these applications. The main component is a DSS optimizer that
      accepts previously generated sequences of queries and remodels them according
      to a set of optimization strategies, before they are executed by the underlying
      database system. The advantages of this extended architecture are discussed and
      a couple of appropriate optimization strategies are identified. Experimental
      results are given, showing that these strategies are appropriate to optimize
      typical query sequences of an OLAP application.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2001-32&engl=0}
 }

@inproceedings {INPROC-1999-01,
    author = {Ralf Rantzau and Holger Schwarz},
    title = {{A Multi-Tier Architecture for High-Performance Data Mining}},
    booktitle = {Proceedings of the Conference Datenbanksysteme in B{\"u}ro, Technik und Wissenschaft (BTW 1999), Freiburg, Germany, March 1999},
    editor = {A. P. Buchmann},
    address = {Berlin, Heidelberg, New York},
    publisher = {Springer},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Germany},
    series = {Informatik aktuell},
    pages = {151--163},
    type = {Konferenz-Beitrag},
    month = {M{\"a}rz},
    year = {1999},
    isbn = {3-540-65606-5},
    language = {Englisch},
    cr-category = {H.2.8 Database Applications},
    ee = {ftp://ftp.informatik.uni-stuttgart.de/pub/library/ncstrl.ustuttgart_fi/INPROC-1999-01/INPROC-1999-01.ps,
      ftp://ftp.informatik.uni-stuttgart.de/pub/library/ncstrl.ustuttgart_fi/INPROC-1999-01/INPROC-1999-01.pdf},
    contact = {rrantzau@acm.org},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte H{\"o}chstleistungsrechner, Anwendersoftware},
    abstract = {Data mining has been recognised as an essential element of decision support,
      which has increasingly become a focus of the database industry. Like all
      computationally expensive data analysis applications, for example Online
      Analytical Processing (OLAP), performance is a key factor for usefulness and
      acceptance in business. In the course of the CRITIKAL project (Client-Server
      Rule Induction Technology for Industrial Knowledge Acquisition from Large
      Databases), which is funded by the European Commission, several kinds of
      architectures for data mining were evaluated with a strong focus on high
      performance. Specifically, the data mining techniques association rule
      discovery and decision tree induction were implemented into a prototype. We
      present the architecture developed by the CRITIKAL consortium and compare it to
      alternative architectures.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-1999-01&engl=0}
 }

@article {ART-2024-01,
    author = {Jan Schneider and Christoph Gr{\"o}ger and Arnold Lutsch and Holger Schwarz and Bernhard Mitschang},
    title = {{The Lakehouse: State of the Art on Concepts and Technologies}},
    journal = {SN Computer Science},
    publisher = {Springer Nature},
    volume = {5},
    number = {5},
    pages = {1--39},
    type = {Artikel in Zeitschrift},
    month = {April},
    year = {2024},
    issn = {2661-8907},
    doi = {10.1007/s42979-024-02737-0},
    keywords = {Data Lakehouse; Data Lake; Data Platform; Data Analytics},
    language = {Englisch},
    cr-category = {H.3.4 Information Storage and Retrieval Systems and Software},
    ee = {https://doi.org/10.1007/s42979-024-02737-0,
      https://link.springer.com/content/pdf/10.1007/s42979-024-02737-0.pdf},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {In the context of data analytics, so-called lakehouses refer to novel variants
      of data platforms that attempt to combine characteristics of data warehouses
      and data lakes. In this way, lakehouses promise to simplify enterprise
      analytics architectures, which often suffer from high operational costs, slow
      analytical processes and further shortcomings resulting from data replication.
      However, different views and notions on the lakehouse paradigm exist, which are
      commonly driven by individual technologies and varying analytical use cases.
      Therefore, it remains unclear what challenges lakehouses address, how they can
      be characterized and which technologies can be leveraged to implement them.
      This paper addresses these issues by providing an extensive overview of
      concepts and technologies that are related to the lakehouse paradigm and by
      outlining lakehouses as a distinct architectural approach for data platforms.
      Concepts and technologies from literature with regard to lakehouses are
      discussed, based on which a conceptual foundation for lakehouses is
      established. In addition, several popular technologies are evaluated regarding
      their suitability for the building of lakehouses. All findings are supported
      and demonstrated with the help of a representative analytics scenario. Typical
      challenges of conventional data platforms are identified, a new, sharper
      definition for lakehouses is proposed and technical requirements for lakehouses
      are derived. As part of an evaluation, these requirements are applied to
      several popular technologies, of which frameworks for data lakes turn out to be
      particularly helpful for the construction of lakehouses. Our work provides an
      overview of the state of the art and a conceptual foundation for the lakehouse
      paradigm, which can support future research.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2024-01&engl=0}
 }

@article {ART-2023-07,
    author = {Rebecca Eichler and Christoph Gr{\"o}ger and Eva Hoos and Christoph Stach and Holger Schwarz and Bernhard Mitschang},
    title = {{Introducing the enterprise data marketplace: a platform for democratizing company data}},
    journal = {Journal of Big Data},
    publisher = {Springer Nature},
    volume = {10},
    pages = {1--38},
    type = {Artikel in Zeitschrift},
    month = {November},
    year = {2023},
    issn = {2196-1115},
    doi = {10.1186/s40537-023-00843-z},
    keywords = {Data Catalog; Data Democratization; Data Market; Data Sharing; Enterprise Data Marketplace; Metadata Management},
    language = {Englisch},
    cr-category = {E.m Data Miscellaneous,
                   H.3.7 Digital Libraries,
                   H.4.m Information Systems Applications Miscellaneous},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {In this big data era, multitudes of data are generated and collected which
      contain the potential to gain new insights, e.g., for enhancing business
      models. To leverage this potential through, e.g., data science and analytics
      projects, the data must be made available. In this context, data marketplaces
      are used as platforms to facilitate the exchange and thus, the provisioning of
      data and data-related services. Data marketplaces are mainly studied for the
      exchange of data between organizations, i.e., as external data marketplaces.
      Yet, the data collected within a company also has the potential to provide
      valuable insights for this same company, for instance to optimize business
      processes. Studies indicate, however, that a significant amount of data within
      companies remains unused. In this sense, it is proposed to employ an Enterprise
      Data Marketplace, a platform to democratize data within a company among its
      employees. Specifics of the Enterprise Data Marketplace, how it can be
      implemented or how it makes data available throughout a variety of systems like
      data lakes has not been investigated in literature so far. Therefore, we
      present the characteristics and requirements of this kind of marketplace. We
      also distinguish it from other tools like data catalogs, provide a platform
      architecture and highlight how it integrates with the company{\^a}€™s system
      landscape. The presented concepts are demonstrated through an Enterprise Data
      Marketplace prototype and an experiment reveals that this marketplace
      significantly improves the data consumer workflows in terms of efficiency and
      complexity. This paper is based on several interdisciplinary works combining
      comprehensive research with practical experience from an industrial
      perspective. We therefore present the Enterprise Data Marketplace as a distinct
      marketplace type and provide the basis for establishing it within a company.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2023-07&engl=0}
 }

@article {ART-2023-03,
    author = {Dennis Treder-Tschechlov and Manuel Fritz and Holger Schwarz and Bernhard Mitschang},
    title = {{ML2DAC: Meta-Learning to Democratize AutoML for Clustering Analysis}},
    journal = {Proceedings of the ACM on Management of Data (SIGMOD)},
    publisher = {Association for Computing Machinery (ACM)},
    volume = {1},
    number = {2},
    pages = {1--26},
    type = {Artikel in Zeitschrift},
    month = {Juni},
    year = {2023},
    doi = {10.1145/3589289},
    language = {Deutsch},
    cr-category = {I.5.3 Pattern Recognition Clustering},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {Analysts often struggle with the combined algorithm selection and
      hyperparameter optimization problem, a.k.a. CASH problem in literature.
      Typically, they execute several algorithms with varying hyperparameter settings
      to find configurations that show valuable results. Efficiently finding these
      configurations is a major challenge. In clustering analyses, analysts face the
      additional challenge to select a cluster validity index that allows them to
      evaluate clustering results in a purely unsupervised fashion. Many different
      cluster validity indices exist and each one has its benefits depending on the
      dataset characteristics. While experienced analysts might address these
      challenges using their domain knowledge and experience, especially novice
      analysts struggle with them. In this paper, we propose a new meta-learning
      approach to address these challenges. Our approach uses knowledge from past
      clustering evaluations to apply strategies that experienced analysts would
      exploit. In particular, we use meta-learning to (a) select a suitable
      clustering validity index, (b) efficiently select well-performing clustering
      algorithm and hyperparameter configurations, and (c) reduce the search space to
      suitable clustering algorithms. In the evaluation, we show that our approach
      significantly outperforms state-of-the-art approaches regarding accuracy and
      runtime.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2023-03&engl=0}
 }

@article {ART-2023-02,
    author = {Vitali Hirsch and Peter Reimann and Dennis Treder-Tschechlov and Holger Schwarz and Bernhard Mitschang},
    title = {{Exploiting Domain Knowledge to address Class Imbalance and a Heterogeneous Feature Space in Multi-Class Classification}},
    journal = {International Journal on Very Large Data Bases (VLDB-Journal)},
    publisher = {Springer},
    type = {Artikel in Zeitschrift},
    month = {Februar},
    year = {2023},
    keywords = {Classification; Domain knowledge; Multi-class Imbalance; Heterogeneous feature space},
    language = {Englisch},
    cr-category = {H.2.8 Database Applications},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {Real-world data of multi-class classification tasks often show complex data
      characteristics that lead to a reduced classification performance. Major
      analytical challenges are a high degree of multi-class imbalance within data
      and a heterogeneous feature space, which increases the number and complexity of
      class patterns. Existing solutions to classification or data pre- processing
      only address one of these two challenges in isolation. We propose a novel
      classification approach that explicitly addresses both challenges of
      multi-class imbalance and heterogeneous feature space together. As main
      contribution, this approach exploits domain knowledge in terms of a taxonomy to
      systematically prepare the training data. Based on an experimental evaluation
      on both real-world data and several synthetically generated data sets, we show
      that our approach outperforms any other classification technique in terms of
      accuracy. Furthermore, it entails considerable practical benefits in real-world
      use cases, e.g., it reduces rework required in the area of product quality
      control.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2023-02&engl=0}
 }

@article {ART-2021-05,
    author = {Manuel Fritz and Michael Behringer and Dennis Tschechlov and Holger Schwarz},
    title = {{Efficient exploratory clustering analyses in large-scale exploration processes}},
    journal = {The VLDB Journal},
    editor = {Georgia Koutrika and Ren{\'e}e J. Miller and Kyuseok Shim},
    address = {Berlin, Heidelberg},
    publisher = {Springer Berlin Heidelberg},
    pages = {1--22},
    type = {Artikel in Zeitschrift},
    month = {November},
    year = {2021},
    doi = {10.1007/s00778-021-00716-y},
    issn = {1066-8888},
    keywords = {Exploratory clustering analysis; Exploration; Clustering; Centroid-based clustering},
    language = {Deutsch},
    cr-category = {H.3.3 Information Search and Retrieval},
    contact = {Senden Sie eine E-Mail an manuel.fritz@ipvs.uni-stuttgart.de.},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {Clustering is a fundamental primitive in manifold applications. In order to
      achieve valuable results in exploratory clustering analyses, parameters of the
      clustering algorithm have to be set appropriately, which is a tremendous
      pitfall. We observe multiple challenges for large-scale exploration processes.
      On the one hand, they require specific methods to efficiently explore large
      parameter search spaces. On the other hand, they often exhibit large runtimes,
      in particular when large datasets are analyzed using clustering algorithms with
      super-polynomial runtimes, which repeatedly need to be executed within
      exploratory clustering analyses. We address these challenges as follows: First,
      we present LOG-Means and show that it provides estimates for the number of
      clusters in sublinear time regarding the defined search space, i.e., provably
      requiring less executions of a clustering algorithm than existing methods.
      Second, we demonstrate how to exploit fundamental characteristics of
      exploratory clustering analyses in order to significantly accelerate the
      (repetitive) execution of clustering algorithms on large datasets. Third, we
      show how these challenges can be tackled at the same time. To the best of our
      knowledge, this is the first work which simultaneously addresses the
      above-mentioned challenges. In our comprehensive evaluation, we unveil that our
      proposed methods significantly outperform state-of-the-art methods, thus
      especially supporting novice analysts for exploratory clustering analyses in
      large-scale exploration processes.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2021-05&engl=0}
 }

@article {ART-2021-03,
    author = {Rebecca Eichler and Corinna Giebler and Christoph Gr{\"o}ger and Holger Schwarz and Bernhard Mitschang},
    title = {{Modeling metadata in data lakes—A generic model}},
    journal = {Data \& Knowledge Engineering},
    publisher = {Elsevier},
    volume = {136},
    pages = {1--17},
    type = {Artikel in Zeitschrift},
    month = {November},
    year = {2021},
    issn = {0169-023X},
    doi = {10.1016/j.datak.2021.101931},
    keywords = {Metadata management; Metadata model; Data lake; Data management; Data lake zones; Metadata classification},
    language = {Englisch},
    cr-category = {H.2 Database Management},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {Data contains important knowledge and has the potential to provide new
      insights. Due to new technological developments such as the Internet of Things,
      data is generated in increasing volumes. In order to deal with these data
      volumes and extract the data{\^a}{\^a}‚¬{\^a}„¢s value new concepts such as the data lake
      were created. The data lake is a data management platform designed to handle
      data at scale for analytical purposes. To prevent a data lake from becoming
      inoperable and turning into a data swamp, metadata management is needed. To
      store and handle metadata, a generic metadata model is required that can
      reflect metadata of any potential metadata management use case, e.g., data
      versioning or data lineage. However, an evaluation of existent metadata models
      yields that none so far are sufficiently generic as their design basis is not
      suited. In this work, we use a different design approach to build HANDLE, a
      generic metadata model for data lakes. The new metadata model supports the
      acquisition of metadata on varying granular levels, any metadata
      categorization, including the acquisition of both metadata that belongs to a
      specific data element as well as metadata that applies to a broader range of
      data. HANDLE supports the flexible integration of metadata and can reflect the
      same metadata in various ways according to the intended utilization.
      Furthermore, it is created for data lakes and therefore also supports data lake
      characteristics like data lake zones. With these capabilities HANDLE enables
      comprehensive metadata management in data lakes. HANDLE{\^a}{\^a}‚¬{\^a}„¢s feasibility is
      shown through the application to an exemplary access-use-case and a
      prototypical implementation. By comparing HANDLE with existing models we
      demonstrate that it can provide the same information as the other models as
      well as adding further capabilities needed for metadata management in data
      lakes.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2021-03&engl=0}
 }

@article {ART-2020-20,
    author = {Corinna Giebler and Christoph Gr{\"o}ger and Eva Hoos and Rebecca Eichler and Holger Schwarz and Bernhard Mitschang},
    title = {{Data Lakes auf den Grund gegangen - Herausforderungen und Forschungsl{\"u}cken in der Industriepraxis}},
    journal = {Datenbank Spektrum},
    publisher = {Springer},
    volume = {20},
    pages = {57--69},
    type = {Artikel in Zeitschrift},
    month = {Januar},
    year = {2020},
    keywords = {Data Lakes; Analytics; Stand der Technik; Herausforderungen; Praxisbeispiel},
    language = {Deutsch},
    cr-category = {H.4 Information Systems Applications},
    contact = {Senden Sie eine E-Mail an Corinna.Giebler@ipvs.uni-stuttgart.de},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {Unternehmen stehen zunehmend vor der Herausforderung, gro{\ss}e, heterogene Daten
      zu verwalten und den darin enthaltenen Wert zu extrahieren. In den letzten
      Jahren kam darum der Data Lake als neuartiges Konzept auf, um diese komplexen
      Daten zu verwalten und zu nutzen. Wollen Unternehmen allerdings einen solchen
      Data Lake praktisch umsetzen, so sto{\ss}en sie auf vielf{\"a}ltige
      Herausforderungen, wie beispielsweise Widerspr{\"u}che in der Definition oder
      unscharfe und fehlende Konzepte. In diesem Beitrag werden konkrete Projekte
      eines global agierenden Industrieunternehmens genutzt, um bestehende
      Herausforderungen zu identifizieren und Anforderungen an Data Lakes
      herzuleiten. Diese Anforderungen werden mit der verf{\"u}gbaren Literatur zum
      Thema Data Lake sowie mit existierenden Ans{\"a}tzen aus der Forschung
      abgeglichen. Die Gegen{\"u}berstellung zeigt, dass f{\"u}nf gro{\ss}e Forschungsl{\"u}cken
      bestehen: 1. Unklare Datenmodellierungsmethoden, 2. Fehlende
      Data-Lake-Referenzarchitektur, 3. Unvollst{\"a}ndiges Metadatenmanagementkonzept,
      4. Unvollst{\"a}ndiges Data-Lake-Governance-Konzept, 5. Fehlende ganzheitliche
      Realisierungsstrategie.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2020-20&engl=0}
 }

@article {ART-2020-11,
    author = {Corinna Giebler and Christoph Gr{\"o}ger and Eva Hoos and Rebecca Eichler and Holger Schwarz and Bernhard Mitschang},
    title = {{Data Lakes auf den Grund gegangen - Herausforderungen und Forschungsl{\"u}cken in der Industriepraxis}},
    journal = {Datenbank Spektrum},
    publisher = {Springer-Verlag},
    volume = {20},
    pages = {57--69},
    type = {Artikel in Zeitschrift},
    month = {Januar},
    year = {2020},
    keywords = {Data Lakes; Industryerfahrung},
    language = {Deutsch},
    cr-category = {H.2.1 Database Management Logical Design},
    contact = {Senden Sie eine E-Mail an Corinna.Giebler@ipvs.uni-stuttgart.de},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {Unternehmen stehen zunehmend vor der Herausforderung, gro{\ss}e, heterogene Daten
      zu verwalten und den darin enthaltenen Wert zu extrahieren. In den letzten
      Jahren kam darum der Data Lake als neuartiges Konzept auf, um diese komplexen
      Daten zu verwalten und zu nutzen. Wollen Unternehmen allerdings einen solchen
      Data Lake praktisch umsetzen, so sto{\ss}en sie auf vielf{\"a}ltige
      Herausforderungen, wie beispielsweise Widerspr{\"u}che in der Definition oder
      unscharfe und fehlende Konzepte. In diesem Beitrag werden konkrete Projekte
      eines global agierenden Industrieunternehmens genutzt, um bestehende
      Herausforderungen zu identifizieren und Anforderungen an Data Lakes
      herzuleiten. Diese Anforderungen werden mit der verf{\"u}gbaren Literatur zum
      Thema Data Lake sowie mit existierenden Ans{\"a}tzen aus der Forschung
      abgeglichen. Die Gegen{\"u}berstellung zeigt, dass f{\"u}nf gro{\ss}e Forschungsl{\"u}cken
      bestehen: 1. Unklare Datenmodellierungsmethoden, 2. Fehlende
      Data-Lake-Referenzarchitektur, 3. Unvollst{\"a}ndiges Metadatenmanagementkonzept,
      4. Unvollst{\"a}ndiges Data-Lake-Governance-Konzept, 5. Fehlende ganzheitliche
      Realisierungsstrategie.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2020-11&engl=0}
 }

@article {ART-2020-10,
    author = {Corinna Giebler and Christoph Gr{\"o}ger and Eva Hoos and Rebecca Eichler and Holger Schwarz and Bernhard Mitschang},
    title = {{Data Lakes auf den Grund gegangen - Herausforderungen und Forschungsl{\"u}cken in der Industriepraxis}},
    journal = {Datenbank Spektrum},
    publisher = {Springer-Verlag},
    volume = {20},
    pages = {57--69},
    type = {Artikel in Zeitschrift},
    month = {Januar},
    year = {2020},
    keywords = {Data Lakes; Industryerfahrung},
    language = {Deutsch},
    cr-category = {H.2.1 Database Management Logical Design},
    contact = {Senden Sie eine E-Mail an Corinna.Giebler@ipvs.uni-stuttgart.de},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {Unternehmen stehen zunehmend vor der Herausforderung, gro{\ss}e, heterogene Daten
      zu verwalten und den darin enthaltenen Wert zu extrahieren. In den letzten
      Jahren kam darum der Data Lake als neuartiges Konzept auf, um diese komplexen
      Daten zu verwalten und zu nutzen. Wollen Unternehmen allerdings einen solchen
      Data Lake praktisch umsetzen, so sto{\ss}en sie auf vielf{\"a}ltige
      Herausforderungen, wie beispielsweise Widerspr{\"u}che in der Definition oder
      unscharfe und fehlende Konzepte. In diesem Beitrag werden konkrete Projekte
      eines global agierenden Industrieunternehmens genutzt, um bestehende
      Herausforderungen zu identifizieren und Anforderungen an Data Lakes
      herzuleiten. Diese Anforderungen werden mit der verf{\"u}gbaren Literatur zum
      Thema Data Lake sowie mit existierenden Ans{\"a}tzen aus der Forschung
      abgeglichen. Die Gegen{\"u}berstellung zeigt, dass f{\"u}nf gro{\ss}e Forschungsl{\"u}cken
      bestehen: 1. Unklare Datenmodellierungsmethoden, 2. Fehlende
      Data-Lake-Referenzarchitektur, 3. Unvollst{\"a}ndiges Metadatenmanagementkonzept,
      4. Unvollst{\"a}ndiges Data-Lake-Governance-Konzept, 5. Fehlende ganzheitliche
      Realisierungsstrategie.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2020-10&engl=0}
 }

@article {ART-2020-04,
    author = {Corinna Giebler and Christoph Gr{\"o}ger and Eva Hoos and Rebecca Eichler and Holger Schwarz and Bernhard Mitschang},
    title = {{Data Lakes auf den Grund gegangen: Herausforderungen und Forschungsl{\"u}cken in der Industriepraxis}},
    journal = {Datenbank-Spektrum},
    publisher = {Springer},
    volume = {20},
    number = {1},
    pages = {57--69},
    type = {Artikel in Zeitschrift},
    month = {Januar},
    year = {2020},
    doi = {10.1007/s13222-020-00332-0},
    keywords = {Data Lake; Analytics; Stand der Technik; Herausforderungen; Praxisbeispiel},
    language = {Deutsch},
    cr-category = {A.1 General Literature, Introductory and Survey,
                   E.0 Data General},
    ee = {https://rdcu.be/b0WM8},
    contact = {Senden Sie eine E-Mail an Corinna.Giebler@ipvs.uni-stuttgart.de},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {Unternehmen stehen zunehmend vor der Herausforderung, gro{\ss}e, heterogene Daten
      zu verwalten und den darin enthaltenen Wert zu extrahieren. In den letzten
      Jahren kam darum der Data Lake als neuartiges Konzept auf, um diese komplexen
      Daten zu verwalten und zu nutzen. Wollen Unternehmen allerdings einen solchen
      Data Lake praktisch umsetzen, so sto{\ss}en sie auf vielf{\"a}ltige
      Herausforderungen, wie beispielsweise Widerspr{\"u}che in der Definition oder
      unscharfe und fehlende Konzepte. In diesem Beitrag werden konkrete Projekte
      eines global agierenden Industrieunternehmens genutzt, um bestehende
      Herausforderungen zu identifizieren und Anforderungen an Data Lakes
      herzuleiten. Diese Anforderungen werden mit der verf{\"u}gbaren Literatur zum
      Thema Data Lake sowie mit existierenden Ans{\"a}tzen aus der Forschung
      abgeglichen. Die Gegen{\"u}berstellung zeigt, dass f{\"u}nf gro{\ss}e Forschungsl{\"u}cken
      bestehen: 1. Unklare Datenmodellierungsmethoden, 2. Fehlende
      Data-Lake-Referenzarchitektur, 3. Unvollst{\"a}ndiges Metadatenmanagementkonzept,
      4. Unvollst{\"a}ndiges Data-Lake-Governance-Konzept, 5. Fehlende ganzheitliche
      Realisierungsstrategie.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2020-04&engl=0}
 }

@article {ART-2019-11,
    author = {Manuel Fritz and Osama Muazzen and Michael Behringer and Holger Schwarz},
    title = {{ASAP-DM: a framework for automatic selection of analytic platforms for data mining}},
    journal = {Software-Intensive Cyber-Physical Systems},
    publisher = {Springer Berlin Heidelberg},
    pages = {1--13},
    type = {Artikel in Zeitschrift},
    month = {August},
    year = {2019},
    issn = {2524-8510},
    isbn = {2524-8529},
    doi = {10.1007/s00450-019-00408-7},
    keywords = {Data mining; Analytic platform; Platform selection},
    language = {Englisch},
    cr-category = {E.0 Data General,
                   H.2.8 Database Applications,
                   H.3.3 Information Search and Retrieval},
    contact = {manuel.fritz@ipvs.uni-stuttgart.de},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {The plethora of analytic platforms escalates the difficulty of selecting the
      most appropriate analytic platform that fits the needed data mining task, the
      dataset as well as additional user-defined criteria. Especially analysts, who
      are rather focused on the analytics domain, experience difficulties to keep up
      with the latest developments. In this work, we introduce the ASAP-DM framework,
      which enables analysts to seamlessly use several platforms, whereas programmers
      can easily add several platforms to the framework. Furthermore, we investigate
      how to predict a platform based on specific criteria, such as lowest runtime or
      resource consumption during the execution of a data mining task. We formulate
      this task as an optimization problem, which can be solved by today{\^a}€™s
      classification algorithms. We evaluate the proposed framework on several
      analytic platforms such as Spark, Mahout, and WEKA along with several data
      mining algorithms for classification, clustering, and association rule
      discovery. Our experiments unveil that the automatic selection process can save
      up to 99.71\% of the execution time due to automatically choosing a faster
      platform.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2019-11&engl=0}
 }

@article {ART-2019-07,
    author = {Manuel Fritz and Michael Behringer and Holger Schwarz},
    title = {{Quality-driven early stopping for explorative cluster analysis for big data}},
    journal = {Software-Intensive Cyber-Physical Systems},
    publisher = {Springer Berlin Heidelberg},
    pages = {1--12},
    type = {Artikel in Zeitschrift},
    month = {Februar},
    year = {2019},
    issn = {2524-8510},
    isbn = {2524-8529},
    doi = {10.1007/s00450-019-00401-0},
    keywords = {Clustering; Big Data; Early Stop; Convergence; Regression},
    language = {Englisch},
    cr-category = {E.0 Data General,
                   H.2.8 Database Applications,
                   H.3.3 Information Search and Retrieval},
    ee = {https://link.springer.com/article/10.1007/s00450-019-00401-0},
    contact = {manuel.fritz@ipvs.uni-stuttgart.de},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {Data analysis has become a critical success factor for companies in all areas.
      Hence, it is necessary to quickly gain knowledge from available datasets, which
      is becoming especially challenging in times of big data. Typical data mining
      tasks like cluster analysis are very time consuming even if they run in highly
      parallel environments like Spark clusters. To support data scientists in
      explorative data analysis processes, we need techniques to make data mining
      tasks even more efficient. To this end, we introduce a novel approach to stop
      clustering algorithms as early as possible while still achieving an adequate
      quality of the detected clusters. Our approach exploits the iterative nature of
      many cluster algorithms and uses a metric to decide after which iteration the
      mining task should stop. We present experimental results based on a Spark
      cluster using multiple huge datasets. The experiments unveil that our approach
      is able to accelerate the clustering up to a factor of more than 800 by
      obliterating many iterations which provide only little gain in quality. This
      way, we are able to find a good balance between the time required for data
      analysis and quality of the analysis results.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2019-07&engl=0}
 }

@article {ART-2016-12,
    author = {Pascal Hirmer and Matthias Wieland and Holger Schwarz and Bernhard Mitschang and Uwe Breitenb{\"u}cher and Santiago G{\'o}mez S{\'a}ez and Frank Leymann},
    title = {{Situation recognition and handling based on executing situation templates and situation-aware workflows}},
    journal = {Computing},
    publisher = {Springer},
    pages = {1--19},
    type = {Artikel in Zeitschrift},
    month = {Oktober},
    year = {2016},
    doi = {10.1007/s00607-016-0522-9},
    keywords = {Situation Recognition; IoT; Context; Integration; Cloud Computing; Workflows; Middleware},
    language = {Englisch},
    cr-category = {J.6 Computer-Aided Engineering,
                   H.3.1 Content Analysis and Indexing},
    ee = {http://dx.doi.org/10.1007/s00607-016-0522-9},
    contact = {pascal.hirmer@ipvs.uni-stuttgart.de},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {Today, the Internet of Things has evolved due to an advanced interconnectivity
      of hardware devices equipped with sensors and actuators. Such connected
      environments are nowadays well-known as smart environments. Famous examples are
      smart homes, smart cities, and smart factories. Such environments should only
      be called {\ss}mart`` if they allow monitoring and self-organization. However, this
      is a great challenge: (1) sensors have to be bound and sensor data have to be
      efficiently provisioned to enable monitoring of these environments, (2)
      situations have to be detected based on sensor data, and (3) based on the
      recognized situations, a reaction has to be triggered to enable
      self-organization, e.g., through notification delivery or the execution of
      workflows. In this article, we introduce SitOPT---an approach for situation
      recognition based on raw sensor data and automated handling of occurring
      situations through notification delivery or execution of situation-aware
      workflows. This article is an extended version of the paper ''SitRS - Situation
      Recognition based on Modeling and Executing Situation Templates`` presented at
      the 9th Symposium and Summer School of Service-oriented Computing 2015.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2016-12&engl=0}
 }

@article {ART-2013-09,
    author = {Sylvia Radesch{\"u}tz and Holger Schwarz and Florian Niedermann},
    title = {{Business impact analysis — a framework for a comprehensive analysis and optimization of business processes}},
    journal = {Computer Science – Research and Development},
    publisher = {Springer},
    pages = {1--18},
    type = {Artikel in Zeitschrift},
    month = {September},
    year = {2013},
    language = {Englisch},
    cr-category = {H.2 Database Management},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {The ability to continuously adapt its business processes is a crucial ability
      for any company in order to survive in todays dynamic world. In order to
      accomplish this task, a company needs to profoundly analyze all its business
      data. This generates the need for data integration and analysis techniques that
      allow for a comprehensive analysis.
      
      A particular challenge when conducting this analysis is the integration of
      process data generated by workflow engines and operational data that is
      produced by business applications and stored in data warehouses. Typically,
      these two types of data are not matched as their acquisition and analysis
      follows different principles, i.e., a process-oriented view versus a view
      focusing on business objects.
      
      To address this challenge, we introduce a framework that allows to improve
      business processes considering an integrated view on process data and
      operational data.We present and evaluate various architectural options for the
      data warehouse that provides this integrated view based on a specialized
      federation layer. This integrated view is also reflected in a set of operators
      that we introduce.We show how these operators ease the definition of analysis
      queries and how they allow to extract hidden optimization patterns by using
      data mining techniques.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2013-09&engl=0}
 }

@article {ART-2011-19,
    author = {Bernhard Mitschang and Holger Schwarz},
    title = {{Der Lehrstuhl ”Datenbanken und Informationssysteme” an der Universit{\"a}t Stuttgart stellt sich vor}},
    journal = {Datenbank-Spektrum},
    publisher = {Springer},
    volume = {11},
    number = {3},
    pages = {213--217},
    type = {Artikel in Zeitschrift},
    month = {November},
    year = {2011},
    language = {Deutsch},
    cr-category = {H.2 Database Management},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {In diesem Beitrag stellen wir den Lehrstuhl f{\"u}r Datenbanken und
      Informationssysteme der Universit{\"a}t Stuttgart unter der Leitung von Prof. Dr.
      Bernhard Mitschang vor. Nach einem {\"U}berblick {\"u}ber die Forschungsschwerpunkte
      des Lehrstuhls gehen wir auf ausgew{\"a}hlte aktuelle Forschungsprojekte ein und
      erl{\"a}utern die Beteiligung an der Lehre in Bachelor- und Masterstudieng{\"a}ngen.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2011-19&engl=0}
 }

@article {ART-2011-14,
    author = {Peter Reimann and Holger Schwarz and Bernhard Mitschang},
    title = {{Design, Implementation, and Evaluation of a Tight Integration of Database and Workflow Engines}},
    journal = {Journal of Information and Data Management},
    editor = {Alberto H. F. Laender and Mirella M. Moro},
    publisher = {SBC - Brazilian Computer Society},
    volume = {2},
    number = {3},
    pages = {353--368},
    type = {Artikel in Zeitschrift},
    month = {Oktober},
    year = {2011},
    issn = {2178-7107},
    keywords = {Data-Intensive Workflow; Improved Local Data Processing; Scientific Workflow; Simulation Workflow},
    language = {Englisch},
    cr-category = {D.2.11 Software Engineering Software Architectures,
                   H.2.8 Database Applications,
                   H.4.1 Office Automation},
    contact = {Peter Reimann Peter.Reimann@ipvs.uni-stuttgart.de},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {Accessing and processing huge amounts of heterogeneous and distributed data are
      some of the major challenges of data-intensive workflows. Traditionally, the
      descriptions of such workflows focus on their data flow. Nevertheless,
      control-flow-oriented workflow languages are increasingly adapted to the needs
      of data-intensive workflows. This provides a common level of abstraction for
      both data-intensive workflows and classical orchestration workflows, e.g.,
      business workflows, which then enables a comprehensive optimization across all
      workflows. However, the problem still remains that workflows described in
      control-flow-oriented languages tend to be less efficient for data-intensive
      processes compared to specialized data-flow-oriented approaches. In this paper,
      we propose a new kind of optimization targeted at data-intensive workflows that
      are described in control-flow-oriented languages. We show how to improve
      efficiency of such workflows by introducing various techniques that partition
      the local data processing tasks to be performed during workflow execution in an
      improved way. These data processing tasks are either assigned to the workflow
      engine or to the tightly integrated local database engine. We evaluate the
      effectiveness of these techniques by means of various test scenarios.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2011-14&engl=0}
 }

@article {ART-2011-07,
    author = {Holger Schwarz},
    title = {{Generierung des Datenzugriffs in Anwendungsprogrammen: Anwendungsbereiche und Implementierungstechniken}},
    journal = {Datenbank Spektrum},
    address = {Heidelberg},
    publisher = {Springer},
    volume = {11},
    number = {1},
    pages = {5--14},
    type = {Artikel in Zeitschrift},
    month = {April},
    year = {2011},
    language = {Deutsch},
    cr-category = {H.4 Information Systems Applications},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {Datenzugriffe auf externe und heterogene Datenbest{\"a}nde sind ein grundlegender
      Bestandteil von Anwendungsprogrammen in ganz unterschiedlichen
      Anwendungsbereichen. Vielfach k{\"o}nnen diese Datenzugriffe nicht {\"u}ber statisch
      eingebettete Anweisungen realisiert werden, sondern m{\"u}ssen dynamisch generiert
      werden. In diesem Beitrag wird das Spektrum relevanter Anwendungsbereiche
      vorgestellt. Ausgehend von einzelnen Systembeispielen werden wichtige Aspekte
      anfragegenerierender Systeme verallgemeinert. Hierzu wird eine
      Systemklassifikation vorgestellt und die Bedeutung der Klassifikation
      insbesondere f{\"u}r Optimierungsaspekte erl{\"a}utert. Ferner werden drei grundlegende
      Implementierungskonzepte f{\"u}r anfragegenerierende Systeme vorgestellt und deren
      Eignung f{\"u}r einzelne Anwendungsklassen diskutiert.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2011-07&engl=0}
 }

@article {ART-2007-08,
    author = {Fabian Kaiser and Mih{\'a}ly Jakob and Sebastian Wiedersheim and Holger Schwarz},
    title = {{Framework-Unterst{\"u}tzung f{\"u}r aufwendige Websuche}},
    journal = {Datenbank-Spektrum},
    publisher = {dpunkt-Verlag},
    volume = {7},
    number = {23},
    pages = {13--20},
    type = {Artikel in Zeitschrift},
    month = {November},
    year = {2007},
    language = {Deutsch},
    cr-category = {H.3.3 Information Search and Retrieval},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {Die Nutzung des WWW als wichtige Informationsquelle ist heute fester
      Bestandteil der t{\"a}glichen Arbeit. Komplexe Suchaufgaben resultieren hierbei
      h{\"a}ufig in nicht trivialen, lang andauernden Suchprozessen, in deren Rahmen
      gro{\ss}e Datenmengen verwaltet und analysiert werden m{\"u}ssen. Ein Beispiel hierf{\"u}r
      ist die Suche nach Experten zu einem gegebenen Themenkomplex. Dieser Beitrag
      stellt das Softwareframework Supernova vor, das derartige Suchprozesse
      unterst{\"u}tzt. Die flexible und erweiterbare Suchplattform erlaubt es, einen
      Focused Crawler mit Standardsuchmaschinen zu kombinieren, stellt diverse
      Analysekomponenten sowie die Infrastruktur f{\"u}r deren Daten- und
      Informationsaustausch bereit und bildet somit die Basis f{\"u}r eine effiziente
      Websuche bei komplexen Fragestellungen.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2007-08&engl=0}
 }

@article {ART-2006-10,
    author = {Mih{\'a}ly Jakob and Fabian Kaiser and Holger Schwarz and Severin Beucker},
    title = {{Generierung von Webanwendungen f{\"u}r das Innovationsmanagement}},
    journal = {it - Information Technology},
    publisher = {Oldenbourg},
    volume = {48},
    number = {4},
    pages = {225--232},
    type = {Artikel in Zeitschrift},
    month = {August},
    year = {2006},
    language = {Deutsch},
    cr-category = {K.6.3 Software Management},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {Der folgende Beitrag gibt einen {\"U}berblick {\"u}ber das vom Bundesministerium f{\"u}r
      Bildung und Forschung (BMBF) im Schwerpunktprogramm Internet{\"o}konomie gef{\"o}rderte
      Forschungsprojekt nova-net: Innovation in der Internet{\"o}konomie. Neben dem
      Forschungsrahmen und den Forschungsfeldern zur Unterst{\"u}tzung nachhaltiger
      Innovationsprozesse, wird insbesondere auf die Methodenentwicklung und deren
      informationstechnische Umsetzung im Themenfeld Trendmonitoring im
      Szenariomanagement eingegangen. Im Mittelpunkt steht hierbei die Erl{\"a}uterung
      des Szenario-Management-Frameworks SEMAFOR im Zusammenhang mit einer neu
      entwickelten Methode zur Entwicklung von Szenarien, sowie deren Umsetzung
      mittels des Webanwendungsgenerators WAGen.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2006-10&engl=0}
 }

@article {ART-2004-01,
    author = {Holger Schwarz},
    title = {{Konzeptueller und logischer Data-Warehouse-Entwurf: Datenmodelle und Schematypen f{\"u}r Data Mining und OLAP}},
    journal = {Informatik Forschung und Entwicklung},
    publisher = {Springer},
    volume = {18},
    number = {2},
    pages = {53--67},
    type = {Artikel in Zeitschrift},
    month = {Januar},
    year = {2004},
    language = {Deutsch},
    cr-category = {H.2.1 Database Management Logical Design,
                   H.2.7 Database Administration,
                   H.2.8 Database Applications},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {Im Data-Warehouse-Bereich gibt es sowohl auf der konzeptuellen als auch auf der
      logischen Ebene unterschiedliche Modellierungsans{\"a}tze, deren Entwicklung sich
      in der Regel an typischen Fragestellungen aus dem Bereich des Online Analytical
      Processing (OLAP) orientierte. Daneben spielen aber auch andere Ans{\"a}tze zur
      Analyse der Daten in einem Data Warehouse eine bedeutende Rolle. Ein wichtiger
      Vertreter ist Data Mining, mit dessen Hilfe bislang unbekannte Muster und
      Zusammenh{\"a}nge in Daten identifiziert werden k{\"o}nnen. Im vorliegenden Artikel
      wird untersucht, in wieweit sich die in der Literatur vorgeschlagenen
      konzeptuellen Datenmodelle f{\"u}r ein Data Warehouse eignen, das OLAP- und
      Data-Mining-Analysen gleicherma{\ss}en unterst{\"u}tzt. Hierzu wird das COCOM-Modell,
      das auf den Modellierungsm{\"o}glichkeiten verschiedener publizierter Modelle
      aufbaut, vorgestellt und bewertet. F{\"u}r das logische Schema eines Data Warehouse
      wird h{\"a}ufig ein sogenanntes Star-Schema oder ein Snowflake-Schema erstellt. F{\"u}r
      diese und weitere Schematypen wird analysiert, welchen Einflu{\ss} die Wahl des
      logischen Schemas auf Anwendungen aus den Bereichen OLAP und Data Mining hat.
      Wichtige Kriterien sind hier unter anderem der Informationsgehalt und die
      Performanz. Insgesamt zeigt dieser Beitrag, dass das COCOM-Modell und das
      Star-Schema eine gute Grundlage f{\"u}r den integrierten Einsatz von OLAP und
      Data-Mining bilden.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2004-01&engl=0}
 }

@article {ART-2003-03,
    author = {Holger Schwarz and Tobias Kraft and Ralf Rantzau and Bernhard Mitschang},
    title = {{Optimierung von Anfragesequenzen in Business-Intelligence-Anwendungen}},
    journal = {it - Information Technology},
    address = {M{\"u}nchen},
    publisher = {Oldenbourg},
    volume = {45},
    number = {4},
    pages = {196--202},
    type = {Artikel in Zeitschrift},
    month = {August},
    year = {2003},
    keywords = {Data Warehouse, Business Intelligence, Anfragesequenzen, OLAP, Data Mining},
    language = {Deutsch},
    cr-category = {H.2.4 Database Management Systems,
                   H.2.7 Database Administration,
                   H.2.8 Database Applications},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {Zur Analyse der Daten in einem Data Warehouse kommen unterschiedliche
      Business-Intelligence-Anwendungen zum Einsatz. Ein wichtiger Erfolgsfaktor f{\"u}r
      deren Nutzung ist die Effizienz, mit der die erstellten Anfragen ausgef{\"u}hrt
      werden. In diesem Beitrag wird zun{\"a}chst das typische Verarbeitungsszenario f{\"u}r
      generierte Anfragesequenzen im Bereich Business Intelligence erl{\"a}utert. Darauf
      aufbauend wird eine Reihe anwendungsneutraler Optimierungsstrategien erl{\"a}utert
      und bewertet. Anhand von Messergebnissen wird gezeigt, dass es sich
      insbesondere bei der Restrukturierung von Anfragesequenzen um einen
      vielversprechenden Ansatz handelt.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2003-03&engl=0}
 }

@inbook {INBOOK-2013-02,
    author = {Sylvia Radesch{\"u}tz and Holger Schwarz and Marko Vrhovnik and Bernhard Mitschang},
    title = {{A Combination Framework for Exploiting the Symbiotic Aspects of Process and Operational Data in Business Process Optimization}},
    series = {Information Reuse and Integration in Academia and Industry},
    publisher = {Springer},
    pages = {29--49},
    type = {Beitrag in Buch},
    month = {September},
    year = {2013},
    language = {Deutsch},
    cr-category = {H.2 Database Management},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {A profound analysis of all relevant business data in a company is necessary for
      optimizing business processes effectively. Current analyses typically run
      either on business process execution data or on operational business data.
      Correlations among the separate data sets have to be found manually under big
      effort. However, to achieve a more informative analysis and to fully optimize a
      company's business, an efficient consolidation of all major data sources is
      indispensable. Recent matching algorithms are insufficient for this task since
      they are restricted either to schema or to process matching. We present a new
      matching framework to (semi-)automatically combine process data models and
      operational data models for performing such a profound business analysis. We
      describe the algorithms and basic matching rules underlying this approach as
      well as an experimental study that shows the achieved high recall and
      precision.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INBOOK-2013-02&engl=0}
 }

@inbook {INBOOK-2009-04,
    author = {Rodrigo Salvador Monteiro and Geraldo Zimbr{\~a}o and Jano Moreira de Souza and Holger Schwarz and Bernhard Mitschang},
    title = {{Exploring Calendar-based Pattern Mining in Data Streams}},
    series = {Complex Data Warehousing and Knowledge Discovery for Advanced Retrieval Development: Innovative Methods and Applications},
    publisher = {IGI Global},
    pages = {1--30},
    type = {Beitrag in Buch},
    month = {Juni},
    year = {2009},
    isbn = {978-1-60566-748-5},
    language = {Englisch},
    cr-category = {H.2.8 Database Applications},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {Finally, Chapter XVI introduces a calendar-based pattern mining that aims at
      identifying patterns on specific calendar partitions in continuous data
      streams. The authors present how a data warehouse approach can be applied to
      leverage calendar-based pattern mining in data streams and how the framework of
      the DWFIST approach can cope with tight time constraints imposed by data
      streams, keep storage requirements at a manageable level and, at the same time,
      support calendar-based frequent itemset mining. The minimum granularity of
      analysis, parameters of the data warehouse (e.g. mining minimum support) and
      parameters of the database (e.g. extent size) provide ways to tune the load
      performance.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INBOOK-2009-04&engl=0}
 }

@inbook {INBOOK-2006-02,
    author = {Rodrigo Salvador Monteiro and Geraldo Zimbr{\~a}o and Holger Schwarz and Bernhard Mitschang and Jano Moreira De Souza},
    title = {{DWFIST: The Data Warehouse of Frequent Itemsets Tactics Approach}},
    series = {Processing and Managing Complex Data for Decision Support},
    publisher = {Idea Group Publishing},
    pages = {1--30},
    type = {Beitrag in Buch},
    month = {April},
    year = {2006},
    isbn = {1-59140-655-2},
    language = {Englisch},
    cr-category = {H.2.7 Database Administration,
                   H.2.8 Database Applications},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {This chapter presents the core of the DWFIST approach, which is concerned with
      supporting the analysis and exploration of frequent itemsets and derived
      patterns, e.g. association rules, in transactional datasets. The goal of this
      new approach is to provide (1) flexible pattern-retrieval capabilities without
      requiring the original data during the analysis phase, and (2) a standard
      modeling for data warehouses of frequent itemsets allowing an easier
      development and reuse of tools for analysis and exploration of itemset-based
      patterns. Instead of storing the original datasets, our approach organizes
      frequent itemsets holding on different partitions of the original transactions
      in a data warehouse that retains sufficient information for future analysis. A
      running example for mining calendar-based patterns on data streams is
      presented. Staging area tasks are discussed and standard conceptual and logical
      schemas are presented. Properties of this standard modeling allow to retrieve
      frequent itemsets holding on any set of partitions along with upper and lower
      bounds on their frequency counts. Furthermore, precision guarantees for some
      interestingness measures of association rules are provided as well.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INBOOK-2006-02&engl=0}
 }

@inbook {INBOOK-2004-08,
    author = {Peter Breitling and Holger Schwarz and Mirka Zimmermann},
    title = {{Verwaltung der Lehr- und Lerninhalte in der Metadatenbank MITO}},
    series = {Information Technology Online: Online-gest{\"u}tztes Lehren und Lernen in informationstechnischen Studieng{\"a}ngen},
    address = {M{\"u}nster, New York, M{\"u}nchen, Berlin},
    publisher = {Waxmann},
    pages = {187--199},
    type = {Beitrag in Buch},
    month = {Juni},
    year = {2004},
    isbn = {3-8309-1358-3},
    language = {Deutsch},
    cr-category = {H.0 Information Systems General},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {Verwaltung der Lehr- und Lerninhalte in der Metadatenbank MITO},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INBOOK-2004-08&engl=0}
 }

@proceedings {PROC-2017-05,
    editor = {Bernhard Mitschang and Norbert Ritter and Holger Schwarz and Meike Klettke and Andreas Thor and Oliver Kopp and Matthias Wieland},
    title = {{Datenbanksysteme f{\"u}r Business, Technologie und Web (BTW 2017), 17. Fachtagung des GI-Fachbereichs ``Datenbanken und Informationssysteme'' (DBIS)}},
    publisher = {Gesellschaft f{\"u}r Informatik e.V. (GI)},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    series = {LNI},
    volume = {P266},
    pages = {410},
    type = {Tagungsband},
    month = {M{\"a}rz},
    year = {2017},
    isbn = {978-3-88579-660-2},
    language = {Englisch},
    cr-category = {H.4.1 Office Automation},
    ee = {http://dblp.org/db/conf/btw/btw2017w.html},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware;
                  Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme},
    abstract = {Datenbanksysteme f{\"u}r Business, Technologie und Web (BTW 2017) -- Workshopband},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=PROC-2017-05&engl=0}
 }

@proceedings {PROC-2011-01,
    editor = {Theo H{\"a}rder and Wolfgang Lehner and Bernhard Mitschang and Harald Sch{\"o}ning and Holger Schwarz},
    title = {{Datenbanksysteme f{\"u}r Business, Technologie und Web (BTW 2011)}},
    publisher = {GI},
    institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
    pages = {760},
    type = {Tagungsband},
    month = {Februar},
    year = {2011},
    isbn = {978-3-88579-274-1},
    language = {Deutsch},
    cr-category = {H.2 Database Management},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware;
                  Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme},
    abstract = {The ”BTW” is a biennial conference series focusing on a broad range of topics
      addressing database management for Business, Technology, and Web. BTW 2011 as
      its 14th event took place in Kaiserslautern from March 2nd to 4th. This volume
      contains 24 long and 6 short papers selected for presentation at the
      conference, 9 industrial contributions, 3 papers or abstracts for the invited
      talks, 12 demonstration proposals, a panel description, and a paper written by
      the winner of the dissertation award. The subject areas include core database
      technology such as query optimization and indexing, DBMS-related prediction
      models, data streams, processing of large data sets, Web-based information
      extraction, benchmarking and simulation, and others.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=PROC-2011-01&engl=0}
 }

@book {BOOK-2010-01,
    author = {Holger Schwarz},
    title = {{Anfragegenerierende Systeme: Anwendungsanalyse, Implementierungs- und Optimierungskonzepte}},
    address = {Wiesbaden},
    publisher = {Vieweg+Teubner},
    pages = {201},
    type = {Buch},
    month = {Juli},
    year = {2010},
    isbn = {978-3-8348-1298-8},
    language = {Deutsch},
    cr-category = {H.4 Information Systems Applications},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware;
                  Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme},
    abstract = {Flexible Datenzugriffe sind in aktuellen Informationssystemen zur Erf{\"u}llung der
      gestiegenen Nutzeranforderungen unabdingbar. In verschiedensten
      Anwendungsgebieten werden daher generative Ans{\"a}tze genutzt, um spezifische
      Anweisungen f{\"u}r Datenzugriffe bereitzustellen.
      
      Holger Schwarz diskutiert Ans{\"a}tze der Anfragegenerierung und beleuchtet
      Implementierungs- und Optimierungskonzepte. Die Generierungsans{\"a}tze erl{\"a}utert
      er unter anderem an konkreten Anwendungsszenarien aus Bereichen wie Business
      Intelligence, Workflow Management und Suchtechnologien. Das betrachtete
      Spektrum erstreckt sich von den einzelnen Anwendungen bis hin zu Werkzeugen,
      die bei der Anwendungsentwicklung zum Einsatz kommen. Zweiter Themenschwerpunkt
      sind aktuelle Ans{\"a}tze zur Optimierung komplexer Strukturen generierter
      Datenzugriffe. Diese werden vorgestellt und ihr Optimierungspotenzial an
      Beispielszenarien aufgezeigt.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=BOOK-2010-01&engl=0}
 }

@book {BOOK-2007-02,
    editor = {Fabian Kaiser and Sven Schimpf and Holger Schwarz and Mih{\'a}ly Jakob and Severin Beucker},
    title = {{Internetgest{\"u}tzte Expertenidentifikation zur Unterst{\"u}tzung der fr{\"u}hen Innovationsphasen}},
    publisher = {Fraunhofer IRB Verlag},
    pages = {30},
    type = {Buch},
    month = {September},
    year = {2007},
    isbn = {978-3-8167-7448-8},
    language = {Deutsch},
    cr-category = {H.3.3 Information Search and Retrieval},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware;
                  Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme},
    abstract = {In dieser Brosch{\"u}re wird eine strukturierte Vorgehensweise beschrieben, mit der
      f{\"u}r einen Innovationsprozess relevante Themenfelder im und mit Hilfe des
      Internet identifiziert und spezifiziert, sowie, darauf aufbauend, Experten in
      diesen Themenfeldern gefunden werden k{\"o}nnen. Im zweiten Teil wird die
      informationstechnische Umsetzung dieser Vorgehensweise der internetgest{\"u}tzten
      Identifikation von Experten dargestellt. Anhand eines Anwendungsfalls wird
      gezeigt, wie Unternehmen durch diese Vorgehensweise im Bezug auf die Planung
      von Technologiefeldern in den fr{\"u}hen Innovationsphasen unterst{\"u}tzt werden
      k{\"o}nnen.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=BOOK-2007-02&engl=0}
 }

@book {BOOK-2007-01,
    editor = {Mih{\'a}ly Jakob and Dierk-Oliver Kiehne and Holger Schwarz and Fabian Kaiser and Severin Beucker},
    title = {{Delphigest{\"u}tztes Szenario-Management und -Monitoring}},
    publisher = {Fraunhofer IRB Verlag},
    pages = {28},
    type = {Buch},
    month = {September},
    year = {2007},
    isbn = {978-3-8167-7449-5},
    language = {Deutsch},
    cr-category = {H.4 Information Systems Applications},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware;
                  Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme},
    abstract = {Die vorliegende Brosch{\"u}re ist Ergebnis des Forschungsprojektes nova-net:
      Innovation in der Internet{\"o}konomie. Das Projekt nova-net entwickelt Theorien,
      Strategien und Instrumente zur Unterst{\"u}tzung nachhaltiger Produkt- und
      Serviceinnovationen in der Internet{\"o}konomie. Dabei wird davon ausgegangen, dass
      das Internet sowohl eine Quelle als auch ein Instrument f{\"u}r nachhaltige
      unternehmerische Innovationsprozesse darstellt. In der vorliegenden Brosch{\"u}re
      wird der Ansatz des delphigest{\"u}tzten Szenario-Managements und seine
      softwaretechnische Unterst{\"u}tzung durch SEMAFOR vorgestellt.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=BOOK-2007-01&engl=0}
 }

@book {BOOK-2006-02,
    author = {Mih{\'a}ly Jakob and Holger Schwarz and Fabian Kaiser},
    title = {{Technologie-Roadmap}},
    publisher = {Fraunhofer IRB Verlag},
    pages = {92},
    type = {Buch},
    month = {April},
    year = {2006},
    isbn = {3-8167-7047-9},
    language = {Deutsch},
    cr-category = {H.2.4 Database Management Systems,
                   H.2.7 Database Administration,
                   H.2.8 Database Applications},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {Im Rahmen der vorliegenden Technologie-Roadmap werden Basis-, Integrations- und
      Anwendungstechnologien aufgezeigt, die die nachhaltige Gestaltung von
      Innovationsprozessen unterst{\"u}tzen k{\"o}nnen. Die einleitenden Kapitel stellen
      sowohl den Innovationsprozess und grunds{\"a}tzliche Themenfelder, die im
      Forschungsprojekt nova-net bearbeitet werden, dar als auch das so genannte
      Information Workflow, das den Informationsverarbeitungsprozess in sinnvolle
      Phasen unterteilt und die systematische L{\"o}sung von Innovationsproblemen
      erm{\"o}glicht. Weiterf{\"u}hrende Teile der Arbeit ordnen f{\"u}r das
      Innovationsmanagement relevante Technologien den einzelnen
      Informationsverarbeitungsphasen zu, und widmen sich speziellen Problemen, die
      in den Themenfeldern des Forschungsprojekts nova-net besondere Beachtung
      verdienen.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=BOOK-2006-02&engl=0}
 }

@book {BOOK-2006-01,
    author = {Fabian Kaiser and Holger Schwarz and Mih{\'a}ly Jakob},
    title = {{Internetbasierte Expertensuche}},
    publisher = {Fraunhofer IRB Verlag},
    pages = {29},
    type = {Buch},
    month = {April},
    year = {2006},
    isbn = {3-8167-7042-8},
    language = {Deutsch},
    cr-category = {H.3.3 Information Search and Retrieval},
    department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
    abstract = {Dieses Arbeitspapier diskutiert Methoden und Techniken zur Suche von Personen
      mit Expertise auf frei definierbaren Themenfeldern. Der Suchraum konzentriert
      sich dabei auf das Internet und insbesondere die Dienste WWW und E-Mail. Es
      wird aufgezeigt, weshalb mittels herk{\"o}mmliche Suchmaschinen im Allgemeinen
      keine befriedigenden Ergebnisse erzielt werden k{\"o}nnen. Auf diesen
      Schwachpunkten aufbauend werden verschiedene Techniken aufgezeigt, deren
      Integration eine Expertensuche unterst{\"u}tzen. Im Vordergrund stehen dabei
      Textklassifizierungssysteme, Crawler und die Integration des Wissens von
      Suchmaschinen {\"u}ber Struktur und Inhalte des Word Wide Web.},
    url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=BOOK-2006-01&engl=0}
 }