@inproceedings {INPROC-2023-06,
   author = {Jan Schneider and Christoph Gr{\"o}ger and Arnold Lutsch and Holger Schwarz and Bernhard Mitschang},
   title = {{Assessing the Lakehouse: Analysis, Requirements and Definition}},
   booktitle = {Proceedings of the 25th International Conference on Enterprise Information Systems, ICEIS 2023, Volume 1, Prague, Czech Republic, April 24-26, 2023},
   editor = {Joaquim Filipe and Michal Smialek and Alexander Brodsky and Slimane Hammoudi},
   address = {Prague},
   publisher = {SciTePress},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {44--56},
   type = {Konferenz-Beitrag},
   month = {Mai},
   year = {2023},
   isbn = {978-989-758-648-4},
   issn = {2184-4992},
   doi = {10.5220/0011840500003467},
   keywords = {Lakehouse; Data Warehouse; Data Lake; Data Management; Data Analytics},
   language = {Englisch},
   cr-category = {H.2.4 Database Management Systems,     H.2.7 Database Administration,     H.2.8 Database Applications},
   ee = {https://www.scitepress.org/PublicationsDetail.aspx?ID=9ydI3Lyl2Fk=,     https://doi.org/10.5220/0011840500003467},
   contact = {jan.schneider@ipvs.uni-stuttgart.de},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {The digital transformation opens new opportunities for enterprises to optimize their business processes by applying data-driven analysis techniques. For storing and organizing the required huge amounts of data, different types of data platforms have been employed in the past, with data warehouses and data lakes being the most prominent ones. Since they possess rather contrary characteristics and address different types of analytics, companies typically utilize both of them, leading to complex architectures with replicated data and slow analytical processes. To counter these issues, vendors have recently been making efforts to break the boundaries and to combine features of both worlds into integrated data platforms. Such systems are commonly called lakehouses and promise to simplify enterprise analytics architectures by serving all kinds of analytical workloads from a single platform. However, it remains unclear how lakehouses can be characterized, since existing definitions focus al most arbitrarily on individual architectural or functional aspects and are often driven by marketing. In this paper, we assess prevalent definitions for lakehouses and finally propose a new definition, from which several technical requirements for lakehouses are derived. We apply these requirements to several popular data management tools, such as Delta Lake, Snowflake and Dremio in order to evaluate whether they enable the construction of lakehouses.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2023-06&engl=0}
}
@inproceedings {INPROC-2023-02,
   author = {Dennis Treder-Tschechlov and Peter Reimann and Holger Schwarz and Bernhard Mitschang},
   title = {{Approach to Synthetic Data Generation for Imbalanced Multi-class Problems with Heterogeneous Groups}},
   booktitle = {Tagungsband der 20. Fachtagung Datenbanksysteme f{\"u}r Business, Technologie und Web (BTW 2019)},
   publisher = {GI Gesellschaft f{\"u}r Informatik e.V. (GI)},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   series = {Lecture Notes in Informatics (LNI)},
   pages = {329--351},
   type = {Konferenz-Beitrag},
   month = {M{\"a}rz},
   year = {2023},
   keywords = {Machine learning; classification; data generation; real-world data characteristics},
   language = {Englisch},
   cr-category = {H.2.8 Database Applications},
   ee = {https://dl.gi.de/bitstream/handle/20.500.12116/40320/B3-5.pdf?},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {To benchmark novel classification algorithms, these algorithms should be evaluated on data with characteristics that also appear in real-world use cases. Important data characteristics that often lead to challenges for classification approaches are multi-class imbalance and heterogeneous groups. Heterogeneous groups are sets of real-world entities, where the classification patterns may vary among different groups and where the groups are typically imbalanced in the data. Real-world data that comprise these characteristics are usually not publicly available, e.g., because they constitute sensitive patient information or due to privacy concerns. Further, the manifestations of the characteristics cannot be controlled specifically on real-world data. A more rigorous approach is to synthetically generate data such that different manifestations of the characteristics can be controlled as well. However, existing data generators are not able to generate data that feature both data characteristics, i.e., multi-class imbalance and heterogeneous groups. In this paper, we propose an approach that fills this gap as it allows to synthetically generate data that exhibit both characteristics. We make use of a taxonomy model that organizes real-world entities in domain-specific heterogeneous groups to generate data reflecting the characteristics of these groups. Further, we incorporate probability distributions to reflect the imbalances of multiple classes and groups from real-world use cases. The evaluation shows that our approach can generate data that feature the data characteristics multi-class imbalance and heterogeneous groups and that it allows to control different manifestations of these characteristics.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2023-02&engl=0}
}
@inproceedings {INPROC-2022-08,
   author = {Rebecca Eichler and Christoph Gr{\"o}ger and Eva Hoos and Christoph Stach and Holger Schwarz and Bernhard Mitschang},
   title = {{Establishing the Enterprise Data Marketplace: Characteristics, Architecture, and Challenges}},
   booktitle = {Proceedings of the Workshop on Data Science for Data Marketplaces in Conjunction with the 48th International Conference on Very Large Data Bases},
   editor = {Xiaohui Yu and Jian Pei},
   publisher = {-},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {1--12},
   type = {Workshop-Beitrag},
   month = {September},
   year = {2022},
   language = {Englisch},
   cr-category = {E.m Data Miscellaneous,     H.3.7 Digital Libraries,     H.4.m Information Systems Applications Miscellaneous},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Companies today have increasing amounts of data at their disposal, most of which is not used, leaving the data value unexploited. In order to leverage the data value, the data must be democratized, i.e., made available to the company employees. In this context, the use of enterprise data marketplaces, platforms for trading data within a company, are proposed. However, specifics of enterprise data marketplaces and how these can be implemented have not been investigated in literature so far. To shed light on these topics, we illustrate the characteristics of an enterprise data marketplace and highlight according marketplace requirements. We provide an enterprise data marketplace architecture, discuss how it integrates into a company's system landscape and present an enterprise data marketplace prototype. Finally, we examine organizational and technical challenges which arise when operating a marketplace in the enterprise context. In this paper, we thereby present the enterprise data marketplace as a distinct marketplace type and provide the basis for establishing it within a company.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2022-08&engl=0}
}
@inproceedings {INPROC-2022-05,
   author = {Rebecca Eichler and Christoph Gr{\"o}ger and Eva Hoos and Holger Schwarz and Bernhard Mitschang},
   title = {{Data Shopping — How an Enterprise Data Marketplace Supports Data Democratization in Companies}},
   booktitle = {Proceedings of the 34th International Conference on Intelligent Information Systems},
   editor = {Jochen De Weerdt and Artem Polyvyanyy},
   address = {Stuttgart},
   publisher = {Springer International Publishing},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   series = {Rebecca Eichler},
   pages = {19--26},
   type = {Konferenz-Beitrag},
   month = {Mai},
   year = {2022},
   isbn = {https://doi.org/10.1007/978-3-031-07481-3_3},
   keywords = {Data Marketplace; Data Sharing; Data Democratization},
   language = {Englisch},
   cr-category = {H.0 Information Systems General},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {To exploit the company's data value, employees must be able to find, understand and access it. The process of making corporate data available to the majority of the company's employees is referred to as data democratization. In this work, we present the current state and challenges of data democratization in companies, derived from a comprehensive literature study and expert interviews we conducted with a manufacturer. In this context a data consumer's journey is presented that reflects the required steps, tool types and roles for finding, understanding and accessing data in addition to revealing three data democratization challenges. To address these challenges we propose the use of an enterprise data marketplace, a novel type of information system for sharing data within the company. We developed a prototype based on which a suitability assessment of a data marketplace yields an improved consumer journey and demonstrates that the marketplace addresses the data democratization challenges and consequently, shows that the marketplace is suited for realizing data democratization.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2022-05&engl=0}
}
@inproceedings {INPROC-2021-06,
   author = {Rebecca Eichler and Corinna Giebler and Christoph Gr{\"o}ger and Eva Hoos and Holger Schwarz and Bernhard Mitschang},
   title = {{Enterprise-Wide Metadata Management - An Industry Case on the Current State and Challenges}},
   booktitle = {24thInternational Conference on Business Information Systems},
   editor = {Witold Abramowicz and S{\"o}ren Auer and Lewa\&\#324 and El\&\#380 Ska and Bieta},
   publisher = {TIB Open Publishing},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {269--279},
   type = {Konferenz-Beitrag},
   month = {Juli},
   year = {2021},
   doi = {https://doi.org/10.52825/bis.v1i.47},
   language = {Englisch},
   cr-category = {A.0 General Literature, General},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Abstract. Metadata management is a crucial success factor for companies today, as for example, it enables exploiting data value fully or enables legal compliance. With the emergence of new concepts, such as the data lake, and new objectives, such as the enterprise-wide sharing of data, metadata management has evolved and now poses a renewed challenge for companies. In this context, we interviewed a globally active manufacturer to reveal how metadata management is implemented in practice today and what challenges companies are faced with and whether these constitute research gaps. As an outcome, we present the company{\^a}€™s metadata management goals and their corresponding solution approaches and challenges. An evaluation of the challenges through a literature and tool review yields three research gaps, which are concerned with the topics: (1) metadata management for data lakes, (2) categorizations and compositions of metadata management tools for comprehensive metadata management, and (3) the use of data marketplaces as metadata-driven exchange platforms within an enterprise. The gaps lay the groundwork for further research activities in the field of metadata management and the industry case represents a starting point for research to realign with real-world industry needs.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2021-06&engl=0}
}
@inproceedings {INPROC-2021-05,
   author = {Corinna Giebler and Christoph Gr{\"o}ger and Eva Hoos and Rebecca Eichler and Holger Schwarz and Bernhard Mitschang},
   title = {{The Data Lake Architecture Framework}},
   booktitle = {Datenbanksysteme f{\"u}r Business, Technologie und Web (BTW 2021), 19. Fachtagung des GI-Fachbereichs Datenbanken und Informationssysteme (DBIS), 13.-17. September 2021, Dresden, Germany},
   publisher = {Gesellschaft f{\"u}r Informatik},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {351--370},
   type = {Konferenz-Beitrag},
   month = {September},
   year = {2021},
   doi = {10.18420/btw2021-19},
   language = {Englisch},
   cr-category = {H.4 Information Systems Applications},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {During recent years, data lakes emerged as a way to manage large amounts of heterogeneous data for modern data analytics. Although various work on individual aspects of data lakes exists, there is no comprehensive data lake architecture yet. Concepts that describe themselves as a {\^a}€śdata lake architecture{\^a}€ť are only partial. In this work, we introduce the data lake architecture framework. It supports the definition of data lake architectures by defining nine architectural aspects, i.e., perspectives on a data lake, such as data storage or data modeling, and by exploring the interdependencies between these aspects. The included methodology helps to choose appropriate concepts to instantiate each aspect. To evaluate the framework, we use it to configure an exemplary data lake architecture for a real-world data lake implementation. This final assessment shows that our framework provides comprehensive guidance in the configuration of a data lake architecture.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2021-05&engl=0}
}
@inproceedings {INPROC-2021-04,
   author = {Manuel Fritz and Gang Shao and Holger Schwarz},
   title = {{Automatic Selection of Analytic Platforms with ASAP-DM}},
   booktitle = {Proceedings of the 33rd International Conference on Scientific and Statistical Database Management},
   publisher = {ACM},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {220--225},
   type = {Konferenz-Beitrag},
   month = {Juli},
   year = {2021},
   isbn = {9781450384131},
   doi = {10.1145/3468791.3468802},
   language = {Englisch},
   cr-category = {H.2.8 Database Applications},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {The plethora of available analytic platforms escalates the difficulty of selecting the most appropriate platform for a certain data mining task and datasets with varying characteristics. Especially novice analysts experience difficulties to keep up with the latest technical developments. In this demo, we present the ASAP-DM framework. ASAP-DM is able to automatically select a well-performing analytic platform for a given data mining task via an intuitive web interface, thus especially supporting novice analysts. The take-aways for demo attendees are: (1) a good understanding of the challenges of various data mining workloads, dataset characteristics, and the effects on the selection of analytic platforms, (2) useful insights on how ASAP-DM internally works, and (3) how to benefit from ASAP-DM for exploratory data analysis.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2021-04&engl=0}
}
@inproceedings {INPROC-2021-03,
   author = {Dennis Tschechlov and Manuel Fritz and Holger Schwarz},
   title = {{AutoML4Clust: Efficient AutoML for Clustering Analyses}},
   booktitle = {Proceedings of the 24th International Conference on Extending Database Technology (EDBT)},
   publisher = {Online},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {1--6},
   type = {Konferenz-Beitrag},
   month = {M{\"a}rz},
   year = {2021},
   doi = {10.5441/002/EDBT.2021.32},
   language = {Englisch},
   cr-category = {H.2.8 Database Applications},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Data analysis is a highly iterative process. In order to achieve valuable analysis results, analysts typically execute many configurations, i.e., algorithms and their hyperparameter settings, based on their domain knowledge. While experienced analysts may be able to define small search spaces for promising configurations, especially novice analysts define large search spaces due to their lack of domain knowledge. In the worst case, they perform an exhaustive search throughout the whole search space, resulting in infeasible runtimes. Recent advances in the research area of AutoML address this challenge by supporting novice analysts in the combined algorithm selection and hyperparameter optimization (CASH) problem for supervised learning tasks. However, no such systems exist for unsupervised learning tasks, such as the prevalent task of clustering analysis. In this work, we present our novel AutoML4Clust approach, which efficiently supports novice analysts regarding CASH for clustering analyses. To the best of our knowledge, this is the first thoroughly elaborated approach in this area. Our comprehensive evaluation unveils that AutoML4Clust significantly outperforms several existing approaches, as it achieves considerable speedups for the CASH problem, while still achieving very valuable clustering results.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2021-03&engl=0}
}
@inproceedings {INPROC-2021-02,
   author = {Manuel Fritz and Dennis Tschechlov and Holger Schwarz},
   title = {{Efficient Exploratory Clustering Analyses with Qualitative Approximations}},
   booktitle = {Proceedings of the 24th International Conference on Extending Database Technology (EDBT)},
   publisher = {Online},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {1--6},
   type = {Konferenz-Beitrag},
   month = {M{\"a}rz},
   year = {2021},
   doi = {10.5441/002/EDBT.2021.31},
   language = {Englisch},
   cr-category = {H.2.8 Database Applications},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Clustering is a fundamental primitive for exploratory data analyses. Yet, finding valuable clustering results for previously unseen datasets is a pivotal challenge. Analysts as well as automated exploration methods often perform an exploratory clustering analysis, i.e., they repeatedly execute a clustering algorithm with varying parameters until valuable results can be found. k-center clustering algorithms, such as k-Means, are commonly used in such exploratory processes. However, in the worst case, each single execution of k-Means requires a super-polynomial runtime, making the overall exploratory process on voluminous datasets infeasible in a reasonable time frame. We propose a novel and efficient approach for approximating results of k-center clustering algorithms, thus supporting analysts in an ad-hoc exploratory process for valuable clustering results. Our evaluation on an Apache Spark cluster unveils that our approach significantly outperforms the regular execution of a k-center clustering algorithm by several orders of magnitude in runtime with a predefinable qualitative demand. Hence, our approach is a strong fit for clustering voluminous datasets in exploratory settings.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2021-02&engl=0}
}
@inproceedings {INPROC-2020-55,
   author = {Corinna Giebler and Christoph Gr{\"o}ger and Eva Hoos and Holger Schwarz and Bernhard Mitschang},
   title = {{A Zone Reference Model for Enterprise-Grade Data Lake Management}},
   booktitle = {Proceedings of the 24th IEEE Enterprise Computing Conference},
   publisher = {IEEE},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {57--66},
   type = {Konferenz-Beitrag},
   month = {Oktober},
   year = {2020},
   keywords = {Data Lake; Zones; Reference Model; Industry Case; Industry Experience},
   language = {Englisch},
   cr-category = {H.4 Information Systems Applications},
   contact = {Senden Sie eine E-Mail an corinna.giebler@ipvs.uni-stuttgart.de},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Data lakes are on the rise as data platforms for any kind of analytics, from data exploration to machine learning. They achieve the required flexibility by storing heterogeneous data in their raw format, and by avoiding the need for pre-defined use cases. However, storing only raw data is inefficient, as for many applications, the same data processing has to be applied repeatedly. To foster the reuse of processing steps, literature proposes to store data in different degrees of processing in addition to their raw format. To this end, data lakes are typically structured in zones. There exists various zone models, but they are varied, vague, and no assessments are given. It is unclear which of these zone models is applicable in a practical data lake implementation in enterprises. In this work, we assess existing zone models using requirements derived from multiple representative data analytics use cases of a real-world industry case. We identify the shortcomings of existing work and develop a zone reference model for enterprise-grade data lake management in a detailed manner. We assess the reference model's applicability through a prototypical implementation for a real-world enterprise data lake use case. This assessment shows that the zone reference model meets the requirements relevant in practice and is ready for industry use.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-55&engl=0}
}
@inproceedings {INPROC-2020-54,
   author = {Manuel Fritz and Dennis Tschechlov and Holger Schwarz},
   title = {{Learning from Past Observations: Meta-Learning for Efficient Clustering Analyses}},
   booktitle = {Proceedings of 22nd Big Data Analytics and Knowledge Discovery (DaWaK), 2020},
   editor = {Min Song and Il-Yeol Song and Gabriele Kotsis and A Min Tjoa and Ismail Khalil},
   publisher = {Springer, Cham},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   series = {Lecture Notes in Computer Science},
   volume = {12393},
   pages = {364--379},
   type = {Konferenz-Beitrag},
   month = {September},
   year = {2020},
   isbn = {978-3-030-59065-9},
   doi = {https://doi.org/10.1007/978-3-030-59065-9_28},
   language = {Englisch},
   cr-category = {H.3.3 Information Search and Retrieval},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Many clustering algorithms require the number of clusters as input parameter prior to execution. Since the {\^a}€śbest{\^a}€ť number of clusters is most often unknown in advance, analysts typically execute clustering algorithms multiple times with varying parameters and subsequently choose the most promising result. Several methods for an automated estimation of suitable parameters have been proposed. Similar to the procedure of an analyst, these estimation methods draw on repetitive executions of a clustering algorithm with varying parameters. However, when working with voluminous datasets, each single execution tends to be very time-consuming. Especially in today{\^a}€™s Big Data era, such a repetitive execution of a clustering algorithm is not feasible for an efficient exploration. We propose a novel and efficient approach to accelerate estimations for the number of clusters in datasets. Our approach relies on the idea of meta-learning and terminates each execution of the clustering algorithm as soon as an expected qualitative demand is met. We show that this new approach is generally applicable, i.e., it can be used with existing estimation methods. Our comprehensive evaluation reveals that our approach is able to speed up the estimation of the number of clusters by an order of magnitude, while still achieving accurate estimates.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-54&engl=0}
}
@inproceedings {INPROC-2020-53,
   author = {Manuel Fritz and Michael Behringer and Holger Schwarz},
   title = {{LOG-Means: Efficiently Estimating the Number of Clusters in Large Datasets}},
   booktitle = {Proceedings of the 46th International Conference on Very Large Databases (VLDB)},
   editor = {Magdalena Balazinska and Xiaofang Zhou},
   publisher = {ACM Digital Library},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   series = {Proceedings of the VLDB Endowment},
   volume = {13 (12)},
   pages = {2118--2131},
   type = {Konferenz-Beitrag},
   month = {August},
   year = {2020},
   isbn = {ISSN 2150-8097},
   doi = {https://doi.org/10.14778/3407790.3407813},
   language = {Englisch},
   cr-category = {H.2.8 Database Applications},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Clustering is a fundamental primitive in manifold applications. In order to achieve valuable results, parameters of the clustering algorithm, e.g., the number of clusters, have to be set appropriately, which is a tremendous pitfall. To this end, analysts rely on their domain knowledge in order to define parameter search spaces. While experienced analysts may be able to define a small search space, especially novice analysts often define rather large search spaces due to the lack of in-depth domain knowledge. These search spaces can be explored in different ways by estimation methods for the number of clusters. In the worst case, estimation methods perform an exhaustive search in the given search space, which leads to infeasible runtimes for large datasets and large search spaces. We propose LOG-Means, which is able to overcome these issues of existing methods. We show that LOG-Means provides estimates in sublinear time regarding the defined search space, thus being a strong fit for large datasets and large search spaces. In our comprehensive evaluation on an Apache Spark cluster, we compare LOG-Means to 13 existing estimation methods. The evaluation shows that LOG-Means significantly outperforms these methods in terms of runtime and accuracy. To the best of our knowledge, this is the most systematic comparison on large datasets and search spaces as of today.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-53&engl=0}
}
@inproceedings {INPROC-2020-50,
   author = {Rebecca Eichler and Corinna Giebler and Christoph Gr{\"o}ger and Holger Schwarz and Bernhard Mitschang},
   title = {{HANDLE - A Generic Metadata Model for Data Lakes}},
   booktitle = {Big Data Analytics and Knowledge Discovery: 22nd International Conference, DaWaK 2020, Bratislava, Slovakia, September 14–17, 2020, Proceedings},
   publisher = {Springer, Cham},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {73--88},
   type = {Konferenz-Beitrag},
   month = {September},
   year = {2020},
   language = {Englisch},
   cr-category = {H.2 Database Management},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {The substantial increase in generated data induced the development of new concepts such as the data lake. A data lake is a large storage repository designed to enable flexible extraction of the data{\^a}€™s value. A key aspect of exploiting data value in data lakes is the collection and management of metadata. To store and handle the metadata, a generic metadata model is required that can reflect metadata of any potential metadata management use case, e.g., data versioning or data lineage. However, an evaluation of existent metadata models yields that none so far are sufficiently generic. In this work, we present HANDLE, a generic metadata model for data lakes, which supports the flexible integration of metadata, data lake zones, metadata on various granular levels, and any metadata categorization. With these capabilities HANDLE enables comprehensive metadata management in data lakes. We show HANDLE{\^a}€™s feasibility through the application to an exemplary access-use-case and a prototypical implementation. A comparison with existent models yields that HANDLE can reflect the same information and provides additional capabilities needed for metadata management in data lakes.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-50&engl=0}
}
@inproceedings {INPROC-2020-45,
   author = {Rebecca Eichler and Corinna Giebler and Christoph Gr{\"o}ger and Holger Schwarz and Bernhard Mitschang},
   title = {{HANDLE - A Generic Metadata Model for Data Lakes}},
   booktitle = {Big Data Analytics and Knowledge Discovery},
   editor = {Min Song and Il-Yeol Song and Gabriele Kotsis and A Min Tjoa and Ismail Khalil},
   publisher = {Springer Nature Switzerland AG},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   series = {Lecture Notes in Computer Science},
   volume = {12393},
   pages = {73--88},
   type = {Konferenz-Beitrag},
   month = {September},
   year = {2020},
   doi = {https://doi.org/10.1007/978-3-030-59065-9_7},
   keywords = {Metadata management; Metadata model; Data lake},
   language = {Englisch},
   cr-category = {H.2 Database Management},
   ee = {ftp://ftp.informatik.uni-stuttgart.de/pub/library/ncstrl.ustuttgart_fi/INPROC-2020-45/INPROC-2020-45.pdf},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {The substantial increase in generated data induced the development of new concepts such as the data lake. A data lake is a large storage repository designed to enable flexible extraction of the data's value. A key aspect of exploiting data value in data lakes is the collection and management of metadata. To store and handle the metadata, a generic metadata model is required that can reflect metadata of any potential metadata management use case, e.g., data versioning or data lineage. However, an evaluation of existent metadata models yields that none so far are sufficiently generic. In this work, we present HANDLE, a generic metadata model for data lakes, which supports the flexible integration of metadata, data lake zones, metadata on various granular levels, and any metadata categorization. With these capabilities HANDLE enables comprehensive metadata management in data lakes. We show HANDLE's feasibility through the application to an exemplary access-use-case and a prototypical implementation. A comparison with existent models yields that HANDLE can reflect the same information and provides additional capabilities needed for metadata management in data lakes.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-45&engl=0}
}
@inproceedings {INPROC-2019-20,
   author = {Manuel Fritz and Holger Schwarz},
   title = {{Initializing k-Means Efficiently: Benefits for Exploratory Cluster Analysis}},
   booktitle = {On the Move to Meaningful Internet Systems: OTM 2019 Conferences},
   editor = {Herv{\'e} Panetto and Christophe Debruyne and Martin Hepp and Dave Lewis and Claudio Agostino Ardagna and Robert Meersman},
   publisher = {Springer Nature Switzerland AG},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   series = {Lecture Notes in Computer Science (LNCS)},
   volume = {11877},
   pages = {146--163},
   type = {Konferenz-Beitrag},
   month = {Januar},
   year = {2019},
   issn = {978-3-030-33245-7},
   doi = {10.1007/978-3-030-33246-4},
   keywords = {Exploratory cluster analysis; k-Means; Initialization},
   language = {Englisch},
   cr-category = {E.0 Data General,     H.2.8 Database Applications,     H.3.3 Information Search and Retrieval},
   ee = {https://link.springer.com/chapter/10.1007/978-3-030-33246-4_9},
   contact = {manuel.fritz@ipvs.uni-stuttgart.de},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Data analysis is a highly exploratory task, where various algorithms with different parameters are executed until a solid result is achieved. This is especially evident for cluster analyses, where the number of clusters must be provided prior to the execution of the clustering algorithm. Since this number is rarely known in advance, the algorithm is typically executed several times with varying parameters. Hence, the duration of the exploratory analysis heavily dependends on the runtime of each execution of the clustering algorithm. While previous work shows that the initialization of clustering algorithms is crucial for fast and solid results, it solely focuses on a single execution of the clustering algorithm and thereby neglects previous executions. We propose Delta Initialization as an initialization strategy for k-Means in such an exploratory setting. The core idea of this new algorithm is to exploit the clustering results of previous executions in order to enhance the initialization of subsequent executions. We show that this algorithm is well suited for exploratory cluster analysis as considerable speedups can be achieved while additionally achieving superior clustering results compared to state-of-the-art initialization strategies.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2019-20&engl=0}
}
@inproceedings {INPROC-2019-15,
   author = {Corinna Giebler and Christoph Gr{\"o}ger and Eva Hoos and Holger Schwarz},
   title = {{Modeling Data Lakes with Data Vault: Practical Experiences, Assessment, and Lessons Learned}},
   booktitle = {Proceedings of the 38th Conference on Conceptual Modeling (ER 2019)},
   publisher = {Springer},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {1--14},
   type = {Konferenz-Beitrag},
   month = {November},
   year = {2019},
   keywords = {Data Lakes; Data Vault; Data Modeling; Industry Experience; Assessment; Lessons Learned},
   language = {Deutsch},
   cr-category = {H.2.1 Database Management Logical Design},
   contact = {Senden Sie eine E-Mail an Corinna.Giebler@ipvs.uni-stuttgart.de},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Data lakes have become popular to enable organization-wide analytics on heterogeneous data from multiple sources. Data lakes store data in their raw format and are often characterized as schema-free. Nevertheless, it turned out that data still need to be modeled, as neglecting data modeling may lead to issues concerning e.g., quality and integration. In current research literature and industry practice, Data Vault is a popular modeling technique for structured data in data lakes. It promises a flexible, extensible data model that preserves data in their raw format. However, hardly any research or assessment exist on the practical usage of Data Vault for modeling data lakes. In this paper, we assess the Data Vault model{\^a}€™s suitability for the data lake context, present lessons learned, and investigate success factors for the use of Data Vault. Our discussion is based on the practical usage of Data Vault in a large, global manufacturer{\^a}€™s data lake and the insights gained in real-world analytics projects.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2019-15&engl=0}
}
@inproceedings {INPROC-2019-14,
   author = {Corinna Giebler and Christoph Gr{\"o}ger and Eva Hoos and Holger Schwarz},
   title = {{Leveraging the Data Lake - Current State and Challenges}},
   booktitle = {Proceedings of the 21st International Conference on Big Data Analytics and Knowledge Discovery (DaWaK'19)},
   publisher = {Springer Nature},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {1--10},
   type = {Konferenz-Beitrag},
   month = {August},
   year = {2019},
   keywords = {Data Lakes, State of the Art, Challenges},
   language = {Deutsch},
   cr-category = {H.2.4 Database Management Systems,     H.2.8 Database Applications},
   contact = {Senden Sie eine E-Mail an Corinna.Giebler@ipvs.uni-stuttgart.de},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {The digital transformation leads to massive amounts of heterogeneous data challenging traditional data warehouse solutions in enterprises. In order to exploit these complex data for competitive advantages, the data lake recently emerged as a concept for more flexible and powerful data analytics. However, existing literature on data lakes is rather vague and incomplete, and the various realization approaches that have been proposed neither cover all aspects of data lakes nor do they provide a comprehensive design and realization strategy. Hence, enterprises face multiple challenges when building data lakes. To address these shortcomings, we investigate existing data lake literature and discuss various design and realization aspects for data lakes, such as governance or data models. Based on these insights, we identify challenges and research gaps concerning (1) data lake architecture, (2) data lake governance, and (3) a comprehensive strategy to realize data lakes. These challenges still need to be addressed to successfully leverage the data lake in practice.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2019-14&engl=0}
}
@inproceedings {INPROC-2019-10,
   author = {Christian Weber and Pascal Hirmer and Peter Reimann and Holger Schwarz},
   title = {{A New Process Model for the Comprehensive Management of Machine Learning Models}},
   booktitle = {Proceedings of the 21st International Conference on Enterprise Information Systems (ICEIS); Heraklion, Crete, Greece, May 3-5, 2019},
   editor = {Joaquim Filipe and Michal Smialek and Alexander Brodsky and Slimane Hammoudi},
   publisher = {SciTePress},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {415--422},
   type = {Konferenz-Beitrag},
   month = {Mai},
   year = {2019},
   isbn = {978-989-758-372-8},
   doi = {10.5220/0007725304150422},
   keywords = {Model Management; Machine Learning; Analytics Process},
   language = {Englisch},
   cr-category = {I.2 Artificial Intelligence},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {The management of machine learning models is an extremely challenging task. Hundreds of prototypical models are being built and just a few are mature enough to be deployed into operational enterprise information systems. The lifecycle of a model includes an experimental phase in which a model is planned, built and tested. After that, the model enters the operational phase that includes deploying, using, and retiring it. The experimental phase is well known through established process models like CRISP-DM or KDD. However, these models do not detail on the interaction between the experimental and the operational phase of machine learning models. In this paper, we provide a new process model to show the interaction points of the experimental and operational phase of a machine learning model. For each step of our process, we discuss according functions which are relevant to managing machine learning models.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2019-10&engl=0}
}
@inproceedings {INPROC-2018-14,
   author = {Corinna Giebler and Christoph Stach and Holger Schwarz and Bernhard Mitschang},
   title = {{BRAID - A Hybrid Processing Architecture for Big Data}},
   booktitle = {Proceedings of the 7th International Conference on Data Science, Technology and Applications (DATA 2018)},
   publisher = {INSTICC Press},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {1--8},
   type = {Konferenz-Beitrag},
   month = {Juli},
   year = {2018},
   keywords = {Big Data; IoT; Batch Processing; Stream Processing; Lambda Architecture; Kappa Architecture},
   language = {Englisch},
   cr-category = {D.2.11 Software Engineering Software Architectures,     H.2.4 Database Management Systems,     H.2.8 Database Applications},
   contact = {Senden Sie eine e-Mail an Corinna.Giebler@ipvs.uni-stuttgart.de},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {The Internet of Things is applied in many domains and collects vast amounts of data. This data provides access to a lot of knowledge when analyzed comprehensively. However, advanced analysis techniques such as predictive or prescriptive analytics require access to both, history data, i.e., long-term persisted data, and real-time data as well as a joint view on both types of data. State-of-the-art hybrid processing architectures for big data - namely, the Lambda and the Kappa Architecture - support the processing of history data and real-time data. However, they lack of a tight coupling of the two processing modes. That is, the user has to do a lot of work manually in order to enable a comprehensive analysis of the data. For instance, the user has to combine the results of both processing modes or apply knowledge from one processing mode to the other. Therefore, we introduce a novel hybrid processing architecture for big data, called BRAID. BRAID intertwines the processing of history data and real-time data by adding communication channels between the batch engine and the stream engine. This enables to carry out comprehensive analyses automatically at a reasonable overhead.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2018-14&engl=0}
}
@inproceedings {INPROC-2015-47,
   author = {Oliver Kopp and Michael Falkenthal and Niklas Hartmann and Frank Leymann and Holger Schwarz and Jessica Thomsen},
   title = {{Towards a Cloud-based Platform Architecture for a Decentralized Market Agent}},
   booktitle = {INFORMATIK 2015},
   editor = {Douglas Cunningham and Petra Hofstedt and Klaus Meer and Ingo Schmitt},
   publisher = {Gesellschaft f{\"u}r Informatik e.V. (GI)},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   series = {Lecture Notes in Informatics (LNI)},
   volume = {P-246},
   pages = {69--80},
   type = {Workshop-Beitrag},
   month = {September},
   year = {2015},
   isbn = {978-3-88579-640-4},
   issn = {1617-5468},
   language = {Englisch},
   cr-category = {J.m Computer Applications Miscellaneous},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware;     Universit{\"a}t Stuttgart, Institut f{\"u}r Architektur von Anwendungssystemen},
   abstract = {Reorganization of power generation, thereby replacing conventional energy sources by innovative renewable energy sources, demands a change in distribution grid structure and operation. The foreseen Decentralized Marked Agent is a new role in the energy market sector accomplishing not only trading on energy and operating reserve markets but also regulating flexibilities at the distribution grid level, such as energy storage and decentralized energy generators, and thereby considering system services and securing system stability. This paper presents requirements on an IT system to support this new role.We design an architecture matching these requirements and show how Cloud computing technology can be used to implement the architecture. This enables data concerning the distribution grid being automatically gathered and processed by dedicated algorithms, aiming to optimize cost efficient operation and the development of the distribution grid.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2015-47&engl=0}
}
@inproceedings {INPROC-2015-46,
   author = {Jessica Thomsen and Niklas Hartmann and Florian Klumpp and Thomas Erge and Michael Falkenthal and Oliver Kopp and Frank Leymann and Sven Stando and Nino Turek and Christoph Schlenzig and Holger Schwarz},
   title = {{Darstellung des Konzeptes -- DMA Decentralised Market Agent -- zur Bew{\"a}ltigung zuk{\"u}nftiger Herausforderungen in Verteilnetzen}},
   booktitle = {INFORMATIK 2015},
   editor = {Douglas Cunningham and Petra Hofstedt and Klaus Meer and Ingo Schmitt},
   publisher = {Gesellschaft f{\"u}r Informatik e.V. (GI)},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   series = {Lecture Notes in Informatics (LNI)},
   volume = {P-246},
   pages = {53--67},
   type = {Workshop-Beitrag},
   month = {September},
   year = {2015},
   isbn = {978-3-88579-640-4},
   issn = {1617-5468},
   language = {Deutsch},
   cr-category = {J.m Computer Applications Miscellaneous},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware;     Universit{\"a}t Stuttgart, Institut f{\"u}r Architektur von Anwendungssystemen},
   abstract = {In der vorliegenden Ver{\"o}ffentlichung wird ein Konzept f{\"u}r einen neuen Marktakteur im Strommarkt vorgestellt, der im zuk{\"u}nftigen Smart Grid als Aggregator und Planer fungieren kann. Dieser Decentralised Market Agent – DMA – soll die Informationen aller vorhandenen Erzeugungs- und Speicheranlagen, Lasten und Netzinformationen auf Verteilnetzebene aggregieren sowie mit lokalen Akteuren und an den zentralen M{\"a}rkten agieren um einen kostenoptimalen Betrieb und Ausbau des Systems Verteilnetzes zu realisieren. Zur Handlungsf{\"a}higkeit dieser neuen Marktrolle bedarf es hochaufl{\"o}sender Messungen im Verteilnetz und einer „real-time“ Aufbereitung der Messdaten. Im vorliegenden Paper sollen das Konzept sowie die notwendigen Bausteine zur Erreichung der Handlungsf{\"a}higkeit des DMA vorgestellt sowie die zuk{\"u}nftig geplanten Untersuchungen erl{\"a}utert werden. Die detaillierte Entwicklung des Konzepts sowie weiterf{\"u}hrende Analysen sind Teil des Projektes NEMAR – Netzbewirtschaftung als neue Marktrolle, gef{\"o}rdert durch BMWi im Rahmen der Forschungsinitiative Zukunftsf{\"a}hige Stromnetze.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2015-46&engl=0}
}
@inproceedings {INPROC-2015-34,
   author = {Pascal Hirmer and Matthias Wieland and Holger Schwarz and Bernhard Mitschang and Uwe Breitenb{\"u}cher and Frank Leymann},
   title = {{SitRS - A Situation Recognition Service based on Modeling and Executing Situation Templates}},
   booktitle = {Proceedings of the 9th Symposium and Summer School On Service-Oriented Computing},
   editor = {Johanna Barzen and Rania Khalaf and Frank Leymann and Bernhard Mitschang},
   publisher = {IBM Research Report},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   series = {Technical Paper},
   volume = {RC25564},
   pages = {113--127},
   type = {Konferenz-Beitrag},
   month = {Dezember},
   year = {2015},
   keywords = {Situation Recognition, IoT, Context, Integration, Cloud Computing, OSLC},
   language = {Englisch},
   cr-category = {J.6 Computer-Aided Engineering,     H.3.1 Content Analysis and Indexing},
   ee = {http://domino.research.ibm.com/library/cyberdig.nsf/papers/656B934403848E8A85257F1D00695A63},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Today, the Internet of Things has evolved due to an advanced connectivity of physical objects. Furthermore, Cloud Computing gains more and more interest for the provisioning of services. In this paper, we want to further improve the integration of these two areas by providing a cloud-based situation recognition service – SitRS. This service can be used to integrate real world objects – the things – into the internet by deriving their situational state based on sensors. This enables context-aware applications to detect events in a smart environment. SitRS is a basic service enabling a generic and easy implementation of Smart* applications such as SmartFactorys, SmartCities, SmartHomes. This paper introduces an approach containing a method and a system architecture for the realization of such a service. The core steps of the method are: (i) registration of the sensors, (ii) modeling of the situation, and (iii) execution of the situation recognition. Furthermore, a prototypical implementation of SitRS is presented and evaluated via runtime measurements.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2015-34&engl=0}
}
@inproceedings {INPROC-2015-24,
   author = {Matthias Wieland and Holger Schwarz and Uwe Breitenb{\"u}cher and Frank Leymann},
   title = {{Towards Situation-Aware Adaptive Workflows}},
   booktitle = {Proceedings of the 13th Annual IEEE Intl. Conference on Pervasive Computing and Communications Workshops: 11th Workshop on Context and Activity Modeling and Recognition},
   address = {St. Louis, Missouri, USA},
   publisher = {IEEE},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {32--37},
   type = {Workshop-Beitrag},
   month = {M{\"a}rz},
   year = {2015},
   keywords = {situation-awareness; adaptive-workflows; situation recognition; situation-aware workflow system},
   language = {Englisch},
   cr-category = {H.4.1 Office Automation},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware;     Universit{\"a}t Stuttgart, Institut f{\"u}r Architektur von Anwendungssystemen},
   abstract = {Workflows are an established IT concept to achieve business goals in a reliable and robust manner. However, the dynamic nature of modern information systems, the upcoming Industry 4.0, and the Internet of Things increase the complexity of modeling robust workflows significantly as various kinds of situations, such as the failure of a production system, have to be considered explicitly. Consequently, modeling workflows in a situation-aware manner is a complex challenge that quickly results in big unmanageable workflow models. To overcome these issues, we present an approach that allows workflows to become situation-aware to automatically adapt their behavior according to the situation they are in. The approach is based on aggregated context information, which has been an important research topic in the last decade to capture information about an environment. We introduce a system that derives high-level situations from lower-level context and sensor information. A situation can be used by different situation-aware workflows to adapt to the current situation in their execution environment. SitOPT enables the detection of situations using different situation-recognition systems, exchange of information about detected situations, optimization of the situation recognition, and runtime adaption and optimization of situationaware workflows based on the recognized situations.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2015-24&engl=0}
}
@inproceedings {INPROC-2014-76,
   author = {Peter Reimann and Holger Schwarz and Bernhard Mitschang},
   title = {{A Pattern Approach to Conquer the Data Complexity in Simulation Workflow Design}},
   booktitle = {Proceedings of OnTheMove Federated Conferences and Workshops (OTM), 22nd International Conference on Cooperative Information Systems (CoopIS 2014)},
   editor = {R. Meersman et al.},
   address = {Amantea, Italy},
   publisher = {Springer Berlin Heidelberg},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   series = {LNCS},
   volume = {8841},
   pages = {21--38},
   type = {Konferenz-Beitrag},
   month = {Oktober},
   year = {2014},
   keywords = {Data Provisioning; Data Management Patterns; SIMPL; Simulation Workflow; Simulation Workflow Design; Workflow; Workflow Design},
   language = {Englisch},
   cr-category = {H.2.5 Heterogeneous Databases,     H.2.8 Database Applications,     H.4.1 Office Automation},
   contact = {Peter Reimann Peter.Reimann@ipvs.uni-stuttgart.de},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Scientific workflows may be used to enable the collaborative implementation of scientific applications across various domains. Since each domain has its own requirements and solutions for data handling, such workflows often have to deal with a highly heterogeneous data environment. This results in an increased complexity of workflow design. As scientists typically design their scientific workflows on their own, this complexity hinders them to concentrate on their core issue, namely the experiments, analyses, or simulations they conduct. In this paper, we present a novel approach to a pattern-based abstraction support for the complex data management in simulation workflows that goes beyond related work in similar research areas. A pattern hierarchy with different abstraction levels enables a separation of concerns according to the skills of different persons involved in workflow design. The goal is that scientists are no longer obliged to specify low-level details of data management in their workflows. We discuss the advantages of this approach and show to what extent it reduces the complexity of simulation workflow design. Furthermore, we illustrate how to map patterns onto executable workflows. Based on a prototypical implementation of three real-world simulations, we evaluate our approach according to relevant requirements.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2014-76&engl=0}
}
@inproceedings {INPROC-2014-52,
   author = {Peter Reimann and Tim Waizenegger and Matthias Wieland and Holger Schwarz},
   title = {{Datenmanagement in der Cloud f{\"u}r den Bereich Simulationen und Wissenschaftliches Rechnen}},
   booktitle = {Proceedings des 2. Workshop Data Management in the Cloud auf der 44. Jahrestagung der Gesellschaft f{\"u}r Informatik e.V. (GI)},
   editor = {Gesellschaft f{\"u}r Informatik e.V. (GI)},
   address = {Stuttgart, Deutschland},
   publisher = {Lecture Notes in Informatics (LNI)},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   type = {Workshop-Beitrag},
   month = {September},
   year = {2014},
   language = {Deutsch},
   cr-category = {H.2.5 Heterogeneous Databases,     H.2.8 Database Applications,     H.4.1 Office Automation},
   contact = {Peter Reimann Peter.Reimann@ipvs.uni-stuttgart.de},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware;     Universit{\"a}t Stuttgart, Institut f{\"u}r Architektur von Anwendungssystemen},
   abstract = {F{\"u}r Organisationen, die Simulationen nicht als ihr Kerngesch{\"a}ft verstehen und sie daher eher sporadisch durchf{\"u}hren, lohnt sich der Betrieb einer eigenen Recheninfrastruktur nur selten. Dies betrifft z.B. kleine und mittlere Unternehmen sowie einige wissenschaftliche Institutionen. Besserung k{\"o}nnen {\"o}ffentliche Cloud-Infrastrukturen als Plattform f{\"u}r die Ausf{\"u}hrung von Simulationen verschaffen. Das Datenmanagement in der Cloud ist aber speziell f{\"u}r den Bereich Simulationen noch weitgehend unerforscht. In diesem Beitrag identifizieren wir daher noch offene Fragestellungen bzgl. des Datenmanagements von Simulationen in der Cloud. Dies betrifft vor allem die Datenbereitstellung und inwieweit nutzer- und simulationsspezifische Anforderungen an das Datenmanagement in der Cloud eingehalten werden k{\"o}nnen. Wir untersuchen Technologien, welche sich diesen Fragestellungen widmen, und diskutieren, ob und wie sie in der Cloud sowie f{\"u}r Simulationen einsetzbar sind. Weiterhin skizzieren wir wichtige zuk{\"u}nftige Forschungsthemen.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2014-52&engl=0}
}
@inproceedings {INPROC-2014-51,
   author = {Peter Reimann and Holger Schwarz},
   title = {{Simulation Workflow Design Tailor-Made for Scientists}},
   booktitle = {Proceedings of the 26th International Conference on Scientific and Statistical Database Management},
   address = {Aalborg, Denmark},
   publisher = {ACM},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   type = {Demonstration},
   month = {Juni},
   year = {2014},
   keywords = {Data Provisioning; Data Management Patterns; Simulation Workflow; Simulation Workflow Design},
   language = {Englisch},
   cr-category = {H.2.5 Heterogeneous Databases,     H.2.8 Database Applications,     H.4.1 Office Automation},
   contact = {Peter Reimann Peter.Reimann@ipvs.uni-stuttgart.de},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Scientific workflows have to deal with highly heterogeneous data environments. In particular, they have to carry out complex data provisioning tasks that filter and transform heterogeneous input data in such a way that underlying tools or services can ingest them. This results in a high complexity of workflow design. Scientists often want to design their workflows on their own, but usually do not have the necessary skills to cope with this complexity. Therefore, we have developed a pattern-based approach to workflow design, thereby mainly focusing on workflows that realize numeric simulations. This approach removes the burden from scientists to specify low-level details of data provisioning. In this demonstration, we apply a prototype implementation of our approach to various use cases and show how it makes simulation workflow design tailor-made for scientists.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2014-51&engl=0}
}
@inproceedings {INPROC-2014-50,
   author = {Peter Reimann and Holger Schwarz and Bernhard Mitschang},
   title = {{Data Patterns to Alleviate the Design of Scientific Workflows Exemplified by a Bone Simulation}},
   booktitle = {Proceedings of the 26th International Conference on Scientific and Statistical Database Management},
   address = {Aalborg, Denmark},
   publisher = {ACM},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   type = {Konferenz-Beitrag},
   month = {Juni},
   year = {2014},
   keywords = {Data Provisioning; Data Management Patterns; Workflow; SIMPL; Simulation Workflow; BPEL; WS-BPEL},
   language = {Englisch},
   cr-category = {H.2.5 Heterogeneous Databases,     H.2.8 Database Applications,     H.4.1 Office Automation},
   contact = {Peter Reimann Peter.Reimann@ipvs.uni-stuttgart.de},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Scientific workflows often have to process huge data sets in a multiplicity of data formats. For that purpose, they typically embed complex data provisioning tasks that transform these heterogeneous data into formats the underlying tools or services can handle. This results in an increased complexity of workflow design. As scientists typically design their scientific workflows on their own, this complexity hinders them to concentrate on their core issue, namely the experiments, analyses, or simulations they conduct. In this paper, we present the core idea of a pattern-based approach to alleviate the design of scientific workflows. This approach is particularly targeted at the needs of scientists. We exemplify and assess the pattern-based design approach by applying it to a complex scientific workflow realizing a real-world simulation of structure changes in bones.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2014-50&engl=0}
}
@inproceedings {INPROC-2014-49,
   author = {Christoph Gr{\"o}ger and Holger Schwarz and Bernhard Mitschang},
   title = {{The Deep Data Warehouse. Link-based Integration and Enrichment of Warehouse Data and Unstructured Content}},
   booktitle = {Proceedings of the 18th IEEE International Enterprise Distributed Object Computing Conference (EDOC), 01-05 September, 2014, Ulm, Germany},
   publisher = {IEEE},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   type = {Konferenz-Beitrag},
   month = {September},
   year = {2014},
   language = {Englisch},
   cr-category = {H.2.7 Database Administration},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Data warehouses are at the core of enterprise IT and enable the efficient storage and analysis of structured data. Besides, unstructured content, e.g., emails and documents, constitutes more than half of the entire enterprise data and contains a lot of implicit knowledge about warehouse entities. Thus, holistic ana-lytics require the integration of structured warehouse data and unstructured content to generate novel insights. These insights can also be used to enrich the integrated data and to create a new basis for further analytics. Existing integration approaches only support a limited range of analytical applications and require the costly adaptation of the warehouse schema. In this paper, we present the Deep Data Warehouse (DeepDWH), a novel type of data warehouse based on the flexible integration and enrichment of warehouse data and unstructured content, addressing the variety challenge of Big Data. It relies on information-rich in-stance-level links between warehouse elements and content items, which are represented in a graph-oriented structure. Neither adaptations of the existing warehouse nor the design of an overall federated schema are required. We design a conceptual linking model and develop a logical schema for links based on a property graph. As a proof of concept, we present a prototypical imple-mentation of the DeepDWH including a link store based on a graph database.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2014-49&engl=0}
}
@inproceedings {INPROC-2014-28,
   author = {Christoph Gr{\"o}ger and Holger Schwarz and Bernhard Mitschang},
   title = {{Prescriptive Analytics for Recommendation-based Business Process Optimization}},
   booktitle = {Proceedings of the 17th International Conference on Business Information Systems (BIS), 22-23 May, 2014, Larnaca, Cyprus},
   editor = {Witold Abramowicz and Angelika Kokkinaki},
   publisher = {Springer},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   series = {176},
   pages = {25--37},
   type = {Konferenz-Beitrag},
   month = {Mai},
   year = {2014},
   keywords = {Prescriptive Analytics, Process Optimization, Process Warehouse, Data Mining, Business Intelligence, Decision Support},
   language = {Englisch},
   cr-category = {H.2.8 Database Applications},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Continuously improved business processes are a central success factor for companies. Yet, existing data analytics do not fully exploit the data generated during process execution. Particularly, they miss prescriptive techniques to transform analysis results into improvement actions. In this paper, we present the data-mining-driven concept of recommendation-based business process op-timization on top of a holistic process warehouse. It prescriptively generates ac-tion recommendations during process execution to avoid a predicted metric de-viation. We discuss data mining techniques and data structures for real-time prediction and recommendation generation and present a proof of concept based on a prototypical implementation in manufacturing.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2014-28&engl=0}
}
@inproceedings {INPROC-2014-10,
   author = {Christoph Gr{\"o}ger and Holger Schwarz and Bernhard Mitschang},
   title = {{The Manufacturing Knowledge Repository. Consolidating Knowledge to Enable Holistic Process Knowledge Management in Manufacturing}},
   booktitle = {Proceedings of the 16th International Conference on Enterprise Information Systems (ICEIS), 27-30 April, 2014, Lisbon, Portugal},
   publisher = {SciTePress},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   type = {Konferenz-Beitrag},
   month = {April},
   year = {2014},
   language = {Englisch},
   cr-category = {H.2.7 Database Administration,     J.1 Administration Data Processing},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {The manufacturing industry is faced with strong competition making the companies’ knowledge resources and their systematic management a critical success factor. Yet, existing concepts for the management of process knowledge in manufacturing are characterized by major shortcomings. Particularly, they are either exclusively based on structured knowledge, e. g., formal rules, or on unstructured knowledge, such as documents, and they focus on isolated aspects of manufacturing processes. To address these issues, we present the Manufacturing Knowledge Repository, a holistic repository that consolidates structured and unstructured process knowledge to facilitate knowledge management and process optimization in manufacturing. First, we define requirements, especially the types of knowledge to be handled, e. g., data mining models and text documents. Next, we develop a conceptual repository data model associating knowledge items and process components such as machines and process steps. Furthermore, we discuss implementation issues including storage architecture variants and present both an evaluation of the data model and a proof of concept based on a prototypical implementation.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2014-10&engl=0}
}
@inproceedings {INPROC-2013-02,
   author = {Peter Reimann and Holger Schwarz},
   title = {{Datenmanagementpatterns in Simulationsworkflows}},
   booktitle = {Proceedings der 15. GI-Fachtagung Datenbanksysteme f{\"u}r Business, Technologie und Web (BTW 2013)},
   editor = {Gesellschaft f{\"u}r Informatik (GI)},
   address = {Magdeburg},
   publisher = {Lecture Notes in Informatics (LNI)},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   series = {Gesellschaft f{\"u}r Informatik (GI)},
   pages = {279--293},
   type = {Konferenz-Beitrag},
   month = {M{\"a}rz},
   year = {2013},
   keywords = {Datenbereitstellung; Datenmanagementpatterns; Workflow; SIMPL; Simulationsworkflow; BPEL; WS-BPEL},
   language = {Deutsch},
   cr-category = {H.2.5 Heterogeneous Databases,     H.2.8 Database Applications,     H.4.1 Office Automation},
   contact = {Peter Reimann Peter.Reimann@ipvs.uni-stuttgart.de},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Simulationsworkflows m{\"u}ssen oftmals gro{\ss}e Datenmengen verarbeiten, die in einer Vielzahl propriet{\"a}rer Formate vorliegen. Damit diese Daten von den im Workflow eingebundenen Programmen und Diensten verarbeitet werden k{\"o}nnen, m{\"u}ssen sie in passende Formate transformiert werden. Dies erh{\"o}ht die Komplexit{\"a}t der Workflowmodellierung, welche i.d.R. durch die Wissenschaftler selbst erfolgt. Dadurch k{\"o}nnen sich diese weniger auf den Kern der eigentlichen Simulation konzentrieren. Zur Behebung dieses Defizits schlagen wir einen Ansatz vor, mit dem die Aktivit{\"a}ten zur Datenbereitstellung in Simulationsabl{\"a}ufen abstrakt modelliert werden k{\"o}nnen. Wissenschaftler sollen keine Implementierungsdetails, sondern lediglich die Kernaspekte der Datenbereitstellung in Form von Patterns beschreiben. Die Spezifikation der Patterns soll dabei m{\"o}glichst in der Sprache der mathematischen Simulationsmodelle erfolgen, mit denen Wissenschaftler vertraut sind. Eine Erweiterung des Workflowsystems bildet die Patterns automatisch auf ausf{\"u}hrbare Workflowfragmente ab, welche die Datenbereitstellung umsetzen. Dies alles reduziert die Komplexit{\"a}t der Modellierung von Simulationsworkflows und erh{\"o}ht die Produktivit{\"a}t der Wissenschaftler.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2013-02&engl=0}
}
@inproceedings {INPROC-2012-14,
   author = {Christoph Gr{\"o}ger and Florian Niedermann and Holger Schwarz and Bernhard Mitschang},
   title = {{Supporting Manufacturing Design by Analytics. Continuous Collaborative Process Improvement enabled by the Advanced Manufacturing Analytics Platform}},
   booktitle = {Proceedings of the 2012 16th IEEE International Conference on Computer Supported Cooperative Work in Design (CSCWD), May 23-25, 2012, Wuhan, China},
   editor = {Liang Gao and Weiming Shen and Jean-Paul Barth{\`e}s and Junzhou Luo and Jianming Yong and Wenfeng Li and Weidong Li},
   publisher = {IEEE},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {793--799},
   type = {Konferenz-Beitrag},
   month = {Mai},
   year = {2012},
   isbn = {978-1-4673-1210-3},
   keywords = {Analytics; Data Mining; Process Management; Manufacturing; Process Optimization},
   language = {Englisch},
   cr-category = {H.2.8 Database Applications,     J.1 Administration Data Processing},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {The manufacturing industry is faced with global competition making efficient, effective and continuously improved manufacturing processes a critical success factor. Yet, media discontinuities, the use of isolated analysis methods on local data sets as well as missing means for sharing analysis results cause a collaborative gap in Manufacturing Process Management that prohibits continuous process improvement. To address this chal-lenge, this paper proposes the Advanced Manufacturing Analytics (AdMA) Platform that bridges the gap by integrating operational and process manufacturing data, defining a reposito-ry for analysis results and providing indication-based and pat-tern-based optimization techniques. Both the conceptual architec-ture underlying the platform as well as its current implementa-tion are presented in this paper.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2012-14&engl=0}
}
@inproceedings {INPROC-2011-37,
   author = {Sylvia Radesch{\"u}tz and Marko Vrhovnik and Holger Schwarz and Bernhard Mitschang},
   title = {{Exploiting the Symbiotic Aspects of Process and Operational Data for Optimizing Business Processes}},
   booktitle = {Proc. of the 12th IEEE International Conference on Information Reuse and Integration (IRI 2011)},
   address = {Las Vegas, USA},
   publisher = {IEEE},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {1--6},
   type = {Konferenz-Beitrag},
   month = {August},
   year = {2011},
   language = {Englisch},
   cr-category = {H.2.4 Database Management Systems},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {A profound analysis of all relevant business data in a company is necessary for optimizing business processes effectively. Current analyses typically run either on business process execution data or on operational business data. Correlations among the separate data sets have to be found manually under big effort. However, to achieve a more informative analysis and to fully optimize a company’s business, an efficient consolidation of all major data sources is indispensable. Recent matching algorithms are insufficient for this task since they are restricted either to schema or to process matching. We present a new matching framework to combine process data models and operational data models (semi-)automatically for performing such a profound business analysis. We describe this approach and its basic matching rules as well as an experimental study that shows the achieved high recall and precision.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2011-37&engl=0}
}
@inproceedings {INPROC-2011-25,
   author = {Florian Niedermann and Holger Schwarz},
   title = {{Deep Business Optimization: Making Business Process Optimization Theory Work in Practice}},
   booktitle = {Proceedings of the Conference on Business Process Modeling, Development and Support (BPMDS 2011)},
   publisher = {Springer},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {1--10},
   type = {Konferenz-Beitrag},
   month = {Juni},
   year = {2011},
   keywords = {Business Process Optimization, Optimization Techniques, Business Process Analytics, Data Mining, Tool Support},
   language = {Englisch},
   cr-category = {H.4.1 Office Automation},
   ee = {ftp://ftp.informatik.uni-stuttgart.de/pub/library/ncstrl.ustuttgart_fi/INPROC-2011-25/INPROC-2011-25.pdf},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {The success of most of today's businesses is tied to the efficiency and effectiveness of their core processes. This importance has been recognized in research, leading to a wealth of sophisticated process optimization and analysis techniques. Their use in practice is, however, often limited as both the selection and the application of the appropriate techniques are challenging tasks. Hence, many techniques are not considered causing potentially significant opportunities of improvement not to be implemented. This paper proposes an approach to addressing this challenge using our deep Business Optimization Platform. By integrating a catalogue of formalized optimization techniques with data analysis and integration capabilities, it assists analysts both with the selection and the application of the most fitting optimization techniques for their specific situation. The paper presents both the concepts underlying this platform as well as its prototypical implementation.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2011-25&engl=0}
}
@inproceedings {INPROC-2011-24,
   author = {Florian Niedermann and Bernhard Maier and Sylvia Radesch{\"u}tz and Holger Schwarz and Bernhard Mitschang},
   title = {{Automated Process Decision Making based on Integrated Source Data}},
   booktitle = {Proceedings of the 14th International Conference on Business Information Systems (BIS 2011)},
   editor = {Witold Abramowicz},
   publisher = {Springer},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   series = {Lecture Notes in Business Information Processing},
   pages = {1--10},
   type = {Konferenz-Beitrag},
   month = {Juni},
   year = {2011},
   keywords = {Data Mining, Decision Automation, Data Integration, Business Process Management, Data-driven Processes},
   language = {Englisch},
   cr-category = {H.4.1 Office Automation,     H.2.8 Database Applications,     H.5.2 Information Interfaces and Presentation User Interfaces},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {The success of most of today's businesses is tied to the efficiency and effectiveness of their core processes. Yet, two major challenges often prevent optimal processes: First, the analysis techniques applied during the optimization are inadequate and fail to include all relevant data sources. Second, the success depends on the abilities of the individual analysts to spot the right designs amongst a plethora of choices. Our deep Business Optimization Platform addresses these challenges through specialized data integration, analysis and optimization facilities. In this paper, we focus on how it uses formalized process optimization patterns for detecting and implementing process improvements.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2011-24&engl=0}
}
@inproceedings {INPROC-2011-07,
   author = {Peter Reimann and Michael Reiter and Holger Schwarz and Dimka Karastoyanova and Frank Leymann},
   title = {{SIMPL - A Framework for Accessing External Data in Simulation Workflows}},
   booktitle = {Datenbanksysteme f{\"u}r Business, Technologie und Web (BTW 2011), 14. Fachtagung des GI-Fachbereichs „Datenbanken und Informationssysteme“ (DBIS), Proceedings, 02.-04. M{\"a}rz 2011, Kaiserslautern, Germany},
   editor = {Gesellschaft f{\"u}r Informatik (GI)},
   publisher = {Lecture Notes in Informatics (LNI)},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   series = {Series of the Gesellschaft f{\"u}r Informatik (GI)},
   volume = {180},
   pages = {534--553},
   type = {Konferenz-Beitrag},
   month = {M{\"a}rz},
   year = {2011},
   isbn = {978-3-88579-274-1},
   keywords = {Data Provisioning; Workflow; Scientific Workflow; Simulation Workflow; BPEL; WS-BPEL; SIMPL},
   language = {Englisch},
   cr-category = {H.2.8 Database Applications,     H.4.1 Office Automation},
   contact = {Peter Reimann Peter.Reimann@ipvs.uni-stuttgart.de},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware;     Universit{\"a}t Stuttgart, Institut f{\"u}r Architektur von Anwendungssystemen},
   abstract = {Adequate data management and data provisioning are among the most important topics to cope with the information explosion intrinsically associated with simulation applications. Today, data exchange with and between simulation applications is mainly accomplished in a file-style manner. These files show proprietary formats and have to be transformed according to the specific needs of simulation applications. Lots of effort has to be spent to find appropriate data sources and to specify and implement data transformations. In this paper, we present SIMPL – an extensible framework that provides a generic and consolidated abstraction for data management and data provisioning in simulation workflows. We introduce extensions to workflow languages and show how they are used to model the data provisioning for simulation workflows based on data management patterns. Furthermore, we show how the framework supports a uniform access to arbitrary external data in such workflows. This removes the burden from engineers and scientists to specify low-level details of data management for their simulation applications and thus boosts their productivity.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2011-07&engl=0}
}
@inproceedings {INPROC-2009-29,
   author = {Fabian Kaiser and Holger Schwarz and Mih{\'a}ly Jakob},
   title = {{Using Wikipedia-based conceptual contexts to calculate document similarity}},
   booktitle = {ICDS2009: Proceedings of the 3rd International Conference on Digital Society},
   address = {Cancun, Mexico},
   publisher = {IEEE Computer Society},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {322--327},
   type = {Konferenz-Beitrag},
   month = {Februar},
   year = {2009},
   language = {Englisch},
   cr-category = {H.3 Information Storage and Retrieval,     H.3.3 Information Search and Retrieval},
   ee = {http://dx.doi.org/10.1109/ICDS.2009.7},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Rating the similarity of two or more text documents is an essential task in information retrieval. For example, document similarity can be used to rank search engine results, cluster documents according to topics etc. A major challenge in calculating document similarity originates from the fact that two documents can have the same topic or even mean the same, while they use different wording to describe the content. A sophisticated algorithm therefore will not directly operate on the texts but will have to find a more abstract representation that captures the texts' meaning. In this paper, we propose a novel approach for calculating the similarity of text documents. It builds on conceptual contexts that are derived from content and structure of the Wikipedia hypertext corpus.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2009-29&engl=0}
}
@inproceedings {INPROC-2008-02,
   author = {Marko Vrhovnik and Holger Schwarz and Sylvia Radesch{\"u}tz and Bernhard Mitschang},
   title = {{An Overview of SQL Support in Workflow Products}},
   booktitle = {Proc. of the 24th International Conference on Data Engineering (ICDE 2008), Canc{\'u}n, M{\'e}xico, April 7-12, 2008},
   publisher = {IEEE},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {1--8},
   type = {Konferenz-Beitrag},
   month = {April},
   year = {2008},
   language = {Englisch},
   cr-category = {H.2.4 Database Management Systems},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Over the last years, data management products as well as workflow products have established themselves as indispensable building blocks for advanced IT systems in almost all application areas. Recently, many vendors have created innovative product extensions that combine service-oriented frameworks with powerful workflow and data management capabilities. In this paper, we discuss several workflow products from different vendors with a specific focus on their SQL support. We provide a comparison based on a set of important data management patterns and illustrate the characteristics of various approaches by means of a running example.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2008-02&engl=0}
}
@inproceedings {INPROC-2008-01,
   author = {Marko Vrhovnik and Holger Schwarz and Stephan Ewen and Oliver Suhre},
   title = {{PGM/F: A Framework for the Optimization of Data Processing in Business Processes}},
   booktitle = {Proc. of the 24th International Conference on Data Engineering (ICDE 2008), Canc{\'u}n, M{\'e}xico, April 7-12, 2008},
   publisher = {IEEE},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {1--4},
   type = {Konferenz-Beitrag},
   month = {April},
   year = {2008},
   language = {Englisch},
   cr-category = {H.2.4 Database Management Systems},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Business process execution languages like BPEL are broadly adopted by industry to integrate the heterogeneous applications and data store of an enterprise. Leading vendors provide extensions to BPEL that allow for a tight integration of data processing capabilities into the process logic. Business processes exploiting these capabilities show a remarkable potential for optimization. In this demonstration, we present PGMOF, a framework for the optimization of data processing in such business processes.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2008-01&engl=0}
}
@inproceedings {INPROC-2007-66,
   author = {Mih{\'a}ly Jakob and Oliver Schiller and Holger Schwarz and Fabian Kaiser},
   title = {{flashWeb: Graphical Modeling of Web Applications for Data Management}},
   booktitle = {Tutorials, posters, panels and industrial contributions at the 26th International Conference on Conceptual Modeling - ER 2007, Auckland New Zealand, December 2007. Vol. 83},
   editor = {John Grundy and Sven Hartmann and Alberto H. F. Laender and Leszek Maciaszek and John F. Roddick},
   address = {Auckland, New Zealand},
   publisher = {ACS},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {59--64},
   type = {Konferenz-Beitrag},
   month = {Dezember},
   year = {2007},
   isbn = {978-1-920682-64-4},
   keywords = {Model-driven Web engineering; Web application modeling; Code Generation},
   language = {Englisch},
   cr-category = {D.2.3 Software Engineering Coding Tools and Techniques,     D.2.11 Software Engineering Software Architectures,     H.4 Information Systems Applications,     H.5.4 Hypertext/Hypermedia},
   ee = {ftp://ftp.informatik.uni-stuttgart.de/pub/library/ncstrl.ustuttgart_fi/INPROC-2007-66/INPROC-2007-66.pdf},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {This paper presents flashWeb, a Computer-Aided Web Engineering (CAWE) tool for the model-driven development of web applications that focus on data management. Present-day web applications, like on-line auction systems or enterprise web portals require comprehensive data access, data processing and data manipulation capabilities. However, existing web application development approaches treat data management operations as second-class citizens. They integrate data operations into existing models or derive them as a by-product of business processes. We argue that data management is an important part of the application logic hence we capture operations with an additional Operation Model. We show that the explicit modeling of operations provides many benefits that distinguish our solution from other approaches. We present the flashWeb development process utilizing a graphical notation for the models in use, a CAWE tool that supports the creation of the graphical models and a code generator that creates ready-to-run web applications.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2007-66&engl=0}
}
@inproceedings {INPROC-2007-61,
   author = {Fabian Kaiser and Holger Schwarz and Mih{\'a}ly Jakob},
   title = {{EXPOSE: Searching the Web for Expertise}},
   booktitle = {Proceedings of the 30th Annual International ACM SIGIR Conference on Research and Development in Information Retrieval, Amsterdam, The Netherlands, July 23-27, 2007.},
   editor = {Wessel Kraaij and Arjen P. de Vries and Charles L. A. Clarke and Norbert Fuhr and Noriko Kando},
   publisher = {ACM},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {1--1},
   type = {Konferenz-Beitrag},
   month = {Januar},
   year = {2007},
   isbn = {978-1-59593-597-7},
   keywords = {Expert Finding; Search Engine; Information Retrieval; Web Search; Knowledge Management},
   language = {Englisch},
   cr-category = {H.3.3 Information Search and Retrieval},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {In this demonstration we will present EXPOSE, our solution to finding experts on the web. We show how EXPOSE supports the user in diverse tasks throughout the whole search process and how using EXPOSE can improve the result quality compared to ad-hoc searches with common web search engines.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2007-61&engl=0}
}
@inproceedings {INPROC-2007-29,
   author = {Rodrigo Monteiro and Geraldo Zimbrao and Holger Schwarz and Bernhard Mitschang and Jano Souza},
   title = {{DWFIST: Leveraging Calendar-based Pattern Mining in Data Streams}},
   booktitle = {Proc. of the 9th International Conference on Data Warehousing and Knowledge Discovery (DaWaK 2007) Regensburg, Germany, 3-7 September, 2007},
   publisher = {-},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {438--448},
   type = {Konferenz-Beitrag},
   month = {September},
   year = {2007},
   language = {Englisch},
   cr-category = {H.2.8 Database Applications},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Calendar-based pattern mining aims at identifying patterns on specific calendar partitions. Potential calendar partitions are for example: every Monday, every first working day of each month, every holiday. Providing flexible mining capabilities for calendar-based partitions is especially challenging in a data stream scenario. The calendar partitions of interest are not known a priori and at each point in time only a subset of the detailed data is available. We show how a data warehouse approach can be applied to this problem. The data warehouse that keeps track of frequent itemsets holding on different partitions of the original stream has low storage requirements. Nevertheless, it allows to derive sets of patterns that are complete and precise. This work demonstrates the effectiveness of our approach by a series of experiments.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2007-29&engl=0}
}
@inproceedings {INPROC-2007-28,
   author = {Marko Vrhovnik and Holger Schwarz and Oliver Suhre and Bernhard Mitschang and Volker Markl and Albert Maier and Tobias Kraft},
   title = {{An Approach to Optimize Data Processing in Business Processes}},
   booktitle = {Proc. of the 33rd International Conference on Very Large Data Bases (VLDB 2007), Vienna, Austria, September 23-28, 2007},
   publisher = {-},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {1--12},
   type = {Konferenz-Beitrag},
   month = {September},
   year = {2007},
   language = {Englisch},
   cr-category = {H.2.4 Database Management Systems},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {In order to optimize their revenues and profits, an increasing number of businesses organize their business activities in terms of business processes. Typically, they automate important business tasks by orchestrating a number of applications and data stores. Obviously, the performance of a business process is directly dependent on the efficiency of data access, data processing, and data management. In this paper, we propose a framework for the optimization of data processing in business processes. We introduce a set of rewrite rules that transform a business process in such a way that an improved execution with respect to data management can be achieved without changing the semantics of the original process. These rewrite rules are based on a semi-procedural process graph model that externalizes data dependencies as well as control flow dependencies of a business process. Furthermore, we present a multi-stage control strategy for the optimization process. We illustrate the benefits and opportunities of our approach through a prototype implementation. Our experimental results demonstrate that independent of the underlying database system performance gains of orders of magnitude are achievable by reasoning about data and control in a unified framework.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2007-28&engl=0}
}
@inproceedings {INPROC-2007-105,
   author = {Tobias Kraft and Holger Schwarz and Bernhard Mitschang},
   title = {{A Statistics Propagation Approach to Enable Cost-Based Optimization of Statement Sequences}},
   booktitle = {Proc. of the 11th East European Conference on Advances in Databases and Information Systems (ADBIS 2007), Varna, Bulgaria, September 29 - October 3, 2007},
   publisher = {-},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {267--282},
   type = {Konferenz-Beitrag},
   month = {September},
   year = {2007},
   language = {Englisch},
   cr-category = {H.2.4 Database Management Systems},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2007-105&engl=0}
}
@inproceedings {INPROC-2006-57,
   author = {Christoph Mangold and Holger Schwarz},
   title = {{Documents meet Databases: A System for Intranet Search}},
   booktitle = {13th International Conference on Management of Data (COMAD 2006), Delhi, India, December 14-16, 2006},
   editor = {L. V. S. Lakshmanan and P. Roy and A. K. H. Tung},
   address = {New Delhi},
   publisher = {Tata McGraw-Hill Publishing Company Limited},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {227--230},
   type = {Konferenz-Beitrag},
   month = {Dezember},
   year = {2006},
   isbn = {0-07-063374-6},
   language = {Englisch},
   cr-category = {H.3.3 Information Search and Retrieval},
   ee = {ftp://ftp.informatik.uni-stuttgart.de/pub/library/ncstrl.ustuttgart_fi/INPROC-2006-57/INPROC-2006-57.pdf},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {In enterprise intranets, information is encoded in documents and databases. Logically, the information in both worlds is tightly connected, however, on the system level there is usually a large gap. In this paper, we propose a system to retrieve documents in the enterprise intranet. The system is an extension to common text search. It does not only consider the content of documents but also it exploits the enterprise databases to determine the documents' context.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2006-57&engl=0}
}
@inproceedings {INPROC-2006-56,
   author = {Christoph Mangold and Holger Schwarz and Bernhard Mitschang},
   title = {{Symbiosis in the Intranet: How Document Retrieval Benefits from Database Information}},
   booktitle = {13th International Conference on Management of Data (COMAD 2006), December 14-16, 2006, Delhi, India},
   editor = {L. V. S. Lakshmanan and P. Roy and A. K. H. Tung},
   address = {New Delhi},
   publisher = {Tata McGraw-Hill Publishing Company Limited},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {201--204},
   type = {Konferenz-Beitrag},
   month = {Dezember},
   year = {2006},
   isbn = {0-07-063374-6},
   language = {Englisch},
   cr-category = {H.3.3 Information Search and Retrieval},
   ee = {ftp://ftp.informatik.uni-stuttgart.de/pub/library/ncstrl.ustuttgart_fi/INPROC-2006-56/INPROC-2006-56.pdf},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {The enterprise information space is split in two hemispheres. Documents contain unstructured or semistructured information; structured information is stored in databases. As regards the content, both kinds of information are complementary parts. However, enterprise information systems usually focus on one part, only. Our approach improves document retrieval in the intranet by exploiting the enterprise's databases. In particular, we exploit database information to describe the context of documents and exploit this context to enhance common full text search. In this paper, we show how to model and compute document context and present results on runtime performance},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2006-56&engl=0}
}
@inproceedings {INPROC-2006-52,
   author = {Christoph Mangold and Holger Schwarz and Bernhard Mitschang},
   title = {{u38: A Framework for Database-Supported Enterprise Document-Retrieval}},
   booktitle = {Proceedings of the Tenth International Database Engineering \& Apllications Symposium (IDEAS2006), Delhi, India, December 11-14, 2006},
   publisher = {IEEE Computer Society},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {1--8},
   type = {Konferenz-Beitrag},
   month = {Dezember},
   year = {2006},
   language = {Englisch},
   cr-category = {H.3.3 Information Search and Retrieval},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {In enterprises, information is encoded in documents and databases. Logically, the information in both worlds is tightly connected, however, on the system level there is usu- ally a large gap. In this paper, we propose a framework that improves document retrieval by exploiting available enter- prise databases. In particular, we use database information to model the context of documents and incorporate this con- text in our search framework. We present our framework architecture, its components and its major interfaces. The framework can be configured and enhanced at well-defined points and, hence, can easily be customized to other do- mains. We furthermore evaluate its core components. Our experiments show that the context-aware approach signifi- cantly improves the quality of search results.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2006-52&engl=0}
}
@inproceedings {INPROC-2006-49,
   author = {Mih{\'a}ly Jakob and Holger Schwarz and Fabian Kaiser and Bernhard Mitschang},
   title = {{Towards an operation model for generated web applications}},
   booktitle = {Workshop proceedings of the sixth international conference on Web engineering (MDWE 2006); Palo Alto, California, USA, July 2006},
   editor = {Association for Computing Machinery (ACM)},
   address = {New York},
   publisher = {ACM Press New York, NY, USA},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   type = {Konferenz-Beitrag},
   month = {Juli},
   year = {2006},
   language = {Englisch},
   cr-category = {D.2.3 Software Engineering Coding Tools and Techniques,     D.2.11 Software Engineering Software Architectures,     H.4 Information Systems Applications,     H.5.4 Hypertext/Hypermedia},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {This paper describes a new approach for the development of data-intensive web applications that depend on non-trivial data manipulation. E-Commerce web sites, on-line auction systems and large enterprise web portals fall into this category as they require comprehensive data access, data processing and data manipulation capabilities. However, existing methodologies mainly concentrate on modeling content, navigation and presentation aspects of read-only web sites. Approaches that consider modeling data operations incorporate them into existing models resulting in a less clear design. We argue that existing models are not sufficient to express complex operations that access or modify web application content. Therefore, we propose an additional Operation Model defining operations for data-intensive web applications. We also propose the utilization of a web application generator to create an Operation Layer based on this Operation Model.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2006-49&engl=0}
}
@inproceedings {INPROC-2006-48,
   author = {Mih{\'a}ly Jakob and Holger Schwarz and Fabian Kaiser and Bernhard Mitschang},
   title = {{Modeling and Generating Application Logic for Data-Intensive Web Applications}},
   booktitle = {Proceedings of the 6th international conference on Web engineering (ICWE2006); Palo Alto, California, USA, July 2006},
   editor = {Association for Computing Machinery (ACM)},
   address = {New York},
   publisher = {ACM Press New York, NY, USA},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {77--84},
   type = {Konferenz-Beitrag},
   month = {Juli},
   year = {2006},
   language = {Englisch},
   cr-category = {D.2.3 Software Engineering Coding Tools and Techniques,     D.2.11 Software Engineering Software Architectures,     H.4 Information Systems Applications,     H.5.4 Hypertext/Hypermedia},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {This paper presents a new approach for the development of data-intensive web applications that depend on sophisticated application logic. E-Commerce web sites, on-line auction systems and large enterprise web portals fall into this category as they require comprehensive data access, data processing and data manipulation capabilities. However, existing methodologies mainly concentrate on modeling content, navigation and presentation aspects of read-only web sites. In our opinion these models are not sufficient to express complex operations that access or modify web application content. Therefore, we propose an additional Operation Model defining the application logic of a web application. We show that based on this model a significant part of a web application’s Operation Layer can be generated, still allowing the manual implementation of arbitrary additional functionality. We evaluate our approach and present experimental results based on a large example application for the area of innovation management.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2006-48&engl=0}
}
@inproceedings {INPROC-2006-14,
   author = {Fabian Kaiser and Holger Schwarz and Mih{\'a}ly Jakob},
   title = {{Finding Experts on the Web}},
   booktitle = {Proceedings of the Second International Conference on Web Information Systems and Technologies, Set{\'u}bal, Portugal, April 11-13, 2006},
   editor = {Jos{\'e} Cordeiro and Vitor Pedrosa and Bruno Encarnacao and Joaquim Filipe},
   publisher = {INSTICC},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {363--368},
   type = {Konferenz-Beitrag},
   month = {April},
   year = {2006},
   language = {Englisch},
   cr-category = {H.3.3 Information Search and Retrieval},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {In this paper, we present an integrated approach on finding experts for arbitrary user defined topics on the World Wide Web. We discuss the special challenges that come along with this issue and why solely applying standard techniques and standard tools like Web search engines is not suitable. We point out the necessity for a dedicated expert search engine, based on a Focused Crawler. The main contribution of our work is an approach to integrate standard Web search engines into the process of searching for experts to utilize the search engines' knowledge about content and structure of the Web.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2006-14&engl=0}
}
@inproceedings {INPROC-2005-57,
   author = {Christoph Mangold and Holger Schwarz and Bernhard Mitschang},
   title = {{Improving Intranet Search Engines Using Context Information from Databases}},
   booktitle = {Proceedings of the 14th ACM International Conference on Information and Knowledge Management (CIKM 2005), Bremen, Germany, October 31 - November 5, 2005},
   editor = {A. Chowdhury and N. Fuhr and M. Ronthaler and H.-J. Schek and W. Teiken},
   publisher = {ACM Press},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {349--350},
   type = {Konferenz-Beitrag},
   month = {Oktober},
   year = {2005},
   isbn = {1-59593-140-6},
   language = {Englisch},
   cr-category = {H.3.3 Information Search and Retrieval},
   ee = {ftp://ftp.informatik.uni-stuttgart.de/pub/library/ncstrl.ustuttgart_fi/INPROC-2005-57/INPROC-2005-57.pdf},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Information in enterprises comes in documents and databases. From a semantic viewpoint, both kinds of information are usually tightly connected. In this paper, we propose to enhance common search-engines with contextual information retrieved from databases. We establish system requirements and anecdotally demonstrate how documents and database information can be represented as the nodes of a graph. Then, we give an example how we exploit this graph information for document retrieval.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2005-57&engl=0}
}
@inproceedings {INPROC-2005-36,
   author = {Mih{\'a}ly Jakob and Fabian Kaiser and Holger Schwarz},
   title = {{SEMAFOR: A Framework for an Extensible Scenario Management System}},
   booktitle = {Proc. of the IEEE International Engineering Management Conference (IEMC) 2005; 2005 in St. John's, Newfoundland, September 11-14, 2005},
   publisher = {IEEE},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {1--5},
   type = {Konferenz-Beitrag},
   month = {September},
   year = {2005},
   language = {Englisch},
   cr-category = {H.3 Information Storage and Retrieval},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {The development of successful new products and services in highly dynamic business environments has become an extremely difficult task. Innovation managers have to utilize a considerable amount of enterprise-internal and enterprise-external information to judge the potential of new products, services and technologies. Scenario management is a powerful instrument to face this problem. Scenarios represent an intuitive concept to comprehend complex present-day and future situations. The scenario technique is a method for building such scenarios. Assessments of future business environments relying on structured scenarios enable innovation managers to target lucrative market segments and to select promising product ideas. However, diverse enterprise-internal and enterprise-external resources have to be utilized to assist the scenario development process. Last but not least, existing methods are often extremely time-consuming and existing tools for scenario development fail to provide a comprehensive solution as they are limited to certain steps in fixed workflows. In this paper, we propose a modular web-based framework for the flexible and efficient development and handling of scenarios. Key aspects of our framework are module-based enterprise-specific workflows, the integration of distributed human resources into the scenario development process and diverse interfaces to enterprise-internal and enterprise-external information sources. The framework is based on self-contained software modules that cover important steps of the scenario management process. This modularity allows the easy supplementation of existing 'scenario technique' methods with newly developed methods that are incorporated into modules and can be combined in a flexible way to fit enterprise-specific requirements.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2005-36&engl=0}
}
@inproceedings {INPROC-2005-17,
   author = {Rodrigo Salvador Monteiro and Geraldo Zimbrao and Holger Schwarz and Bernhard Mitschang and Jano Moreira De Souza},
   title = {{Building the Data Warehouse of Frequent Itemsets in the DWFIST Approach}},
   booktitle = {Proceedings of the 15th International Symposium on Methodologies for Intelligent Systems Saratoga Springs, New York - May 25-28, 2005},
   publisher = {Springer},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {1--9},
   type = {Konferenz-Beitrag},
   month = {Mai},
   year = {2005},
   isbn = {3-540-25878-7},
   language = {Englisch},
   cr-category = {H.2.7 Database Administration,     H.2.8 Database Applications},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Some data mining tasks can produce such great amounts of data that we have to cope with a new knowledge management problem. Frequent itemset mining fits in this category. Different approaches were proposed to handle or avoid somehow this problem. All of them have problems and limitations. In particular, most of them need the original data during the analysis phase, which is not feasible for data streams. The DWFIST (Data Warehouse of Frequent ItemSets Tactics) approach aims at providing a powerful environment for the analysis of itemsets and derived patterns, such as association rules, without accessing the original data during the analysis phase. This approach is based on a Data Warehouse of Frequent Itemsets. It provides frequent itemsets in a flexible and efficient way as well as a standardized logical view upon which analytical tools can be developed. This paper presents how such a data warehouse can be built.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2005-17&engl=0}
}
@inproceedings {INPROC-2004-30,
   author = {Tobias Kraft and Holger Schwarz},
   title = {{CHICAGO: A Test and Evaluation Environment for Coarse-Grained Optimization}},
   booktitle = {Proceedings of the 30th International Conference on Very Large Databases, Toronto, Canada, August 29th - September 3rd, 2004},
   publisher = {Morgan Kaufmann},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {1345--1348},
   type = {Konferenz-Beitrag},
   month = {August},
   year = {2004},
   isbn = {0-12-088469-0},
   language = {Deutsch},
   cr-category = {H.2.4 Database Management Systems},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Relational OLAP tools and other database applications generate sequences of SQL statements that are sent to the database server as result of a single information request issued by a user. Coarse-Grained Optimization is a practical approach for the optimization of such statement sequences based on rewrite rules. In this demonstration we present the CHICAGO test and evaluation environment that allows to assess the effectiveness of rewrite rules and control strategies. It includes a lightweight heuristic optimizer that modifies a given statement sequence using a small and variable set of rewrite rules.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2004-30&engl=0}
}
@inproceedings {INPROC-2003-04,
   author = {Tobias Kraft and Holger Schwarz and Ralf Rantzau and Bernhard Mitschang},
   title = {{Coarse-Grained Optimization: Techniques for Rewriting SQL Statement Sequences}},
   booktitle = {Proceedings of 29th International Conference on Very Large Data Bases (VLDB 2003), Berlin, September 9-12, 2003},
   publisher = {Morgan Kaufmann},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Germany},
   pages = {488--499},
   type = {Konferenz-Beitrag},
   month = {September},
   year = {2003},
   isbn = {0-12-722442-4},
   keywords = {SQL; Query Optimization; OLAP},
   language = {Englisch},
   cr-category = {H.2.4 Database Management Systems},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte H{\"o}chstleistungsrechner, Anwendersoftware},
   abstract = {Relational OLAP tools and other database applications generate sequences of SQL statements that are sent to the database server as result of a single information request provided by a user. Unfortunately, these sequences cannot be processed efficiently by current database systems because they typically optimize and process each statement in isolation. We propose a practical approach for this optimization problem, called ``coarse-grained optimization,'' complementing the conventional query optimization phase. This new approach exploits the fact that statements of a sequence are correlated since they belong to the same information request. A lightweight heuristic optimizer modifies a given statement sequence using a small set of rewrite rules. Since the optimizer is part of a separate system layer, it is independent of but can be tuned to a specific underlying database system. We discuss implementation details and demonstrate that our approach leads to significant performance improvements.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2003-04&engl=0}
}
@inproceedings {INPROC-2001-32,
   author = {Holger Schwarz and Ralf Wagner and Bernhard Mitschang},
   title = {{Improving the Processing of Decision Support Queries: The Case for a DSS Optimizer}},
   booktitle = {Proc. of the 2001 International Database Engineering \& Applications Symposium (IDEAS), July 16-18, 2001},
   editor = {Michel Adiba and Christine Collet and Bipin C. Desai},
   address = {Los Alamitos, Washington, Brussels, Tokyo},
   publisher = {IEEE Computer Society},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Germany},
   pages = {177--186},
   type = {Konferenz-Beitrag},
   month = {Juli},
   year = {2001},
   isbn = {0-7695-1140-6},
   keywords = {Decision Support; OLAP; Data Warehouse},
   language = {Englisch},
   cr-category = {H.4.2 Information Systems Applications Types of Systems},
   ee = {ftp://ftp.informatik.uni-stuttgart.de/pub/library/ncstrl.ustuttgart_fi/INPROC-2001-32/INPROC-2001-32.pdf},
   contact = {holger.schwarz@informatik.uni-stuttgart.de},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte H{\"o}chstleistungsrechner, Anwendersoftware},
   abstract = {Many decision support applications are built upon data mining and OLAP tools and allow users to answer information requests based on a data warehouse that is managed by a powerful DBMS. In this paper, we focus on tools that generate sequences of SQL statements in order to produce the requested information. Our thorough analysis revealed that many sequences of queries that are generated by commercial tools are not very efficient. An optimized system architecture is suggested for these applications. The main component is a DSS optimizer that accepts previously generated sequences of queries and remodels them according to a set of optimization strategies, before they are executed by the underlying database system. The advantages of this extended architecture are discussed and a couple of appropriate optimization strategies are identified. Experimental results are given, showing that these strategies are appropriate to optimize typical query sequences of an OLAP application.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2001-32&engl=0}
}
@inproceedings {INPROC-1999-01,
   author = {Ralf Rantzau and Holger Schwarz},
   title = {{A Multi-Tier Architecture for High-Performance Data Mining}},
   booktitle = {Proceedings of the Conference Datenbanksysteme in B{\"u}ro, Technik und Wissenschaft (BTW 1999), Freiburg, Germany, March 1999},
   editor = {A. P. Buchmann},
   address = {Berlin, Heidelberg, New York},
   publisher = {Springer},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Germany},
   series = {Informatik aktuell},
   pages = {151--163},
   type = {Konferenz-Beitrag},
   month = {M{\"a}rz},
   year = {1999},
   isbn = {3-540-65606-5},
   language = {Englisch},
   cr-category = {H.2.8 Database Applications},
   ee = {ftp://ftp.informatik.uni-stuttgart.de/pub/library/ncstrl.ustuttgart_fi/INPROC-1999-01/INPROC-1999-01.ps,     ftp://ftp.informatik.uni-stuttgart.de/pub/library/ncstrl.ustuttgart_fi/INPROC-1999-01/INPROC-1999-01.pdf},
   contact = {rrantzau@acm.org},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte H{\"o}chstleistungsrechner, Anwendersoftware},
   abstract = {Data mining has been recognised as an essential element of decision support, which has increasingly become a focus of the database industry. Like all computationally expensive data analysis applications, for example Online Analytical Processing (OLAP), performance is a key factor for usefulness and acceptance in business. In the course of the CRITIKAL project (Client-Server Rule Induction Technology for Industrial Knowledge Acquisition from Large Databases), which is funded by the European Commission, several kinds of architectures for data mining were evaluated with a strong focus on high performance. Specifically, the data mining techniques association rule discovery and decision tree induction were implemented into a prototype. We present the architecture developed by the CRITIKAL consortium and compare it to alternative architectures.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-1999-01&engl=0}
}
@article {ART-2023-07,
   author = {Rebecca Eichler and Christoph Gr{\"o}ger and Eva Hoos and Christoph Stach and Holger Schwarz and Bernhard Mitschang},
   title = {{Introducing the enterprise data marketplace: a platform for democratizing company data}},
   journal = {Journal of Big Data},
   publisher = {Springer Nature},
   volume = {10},
   pages = {1--38},
   type = {Artikel in Zeitschrift},
   month = {November},
   year = {2023},
   issn = {2196-1115},
   doi = {10.1186/s40537-023-00843-z},
   keywords = {Data Catalog; Data Democratization; Data Market; Data Sharing; Enterprise Data Marketplace; Metadata Management},
   language = {Englisch},
   cr-category = {E.m Data Miscellaneous,     H.3.7 Digital Libraries,     H.4.m Information Systems Applications Miscellaneous},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {In this big data era, multitudes of data are generated and collected which contain the potential to gain new insights, e.g., for enhancing business models. To leverage this potential through, e.g., data science and analytics projects, the data must be made available. In this context, data marketplaces are used as platforms to facilitate the exchange and thus, the provisioning of data and data-related services. Data marketplaces are mainly studied for the exchange of data between organizations, i.e., as external data marketplaces. Yet, the data collected within a company also has the potential to provide valuable insights for this same company, for instance to optimize business processes. Studies indicate, however, that a significant amount of data within companies remains unused. In this sense, it is proposed to employ an Enterprise Data Marketplace, a platform to democratize data within a company among its employees. Specifics of the Enterprise Data Marketplace, how it can be implemented or how it makes data available throughout a variety of systems like data lakes has not been investigated in literature so far. Therefore, we present the characteristics and requirements of this kind of marketplace. We also distinguish it from other tools like data catalogs, provide a platform architecture and highlight how it integrates with the company{\^a}€™s system landscape. The presented concepts are demonstrated through an Enterprise Data Marketplace prototype and an experiment reveals that this marketplace significantly improves the data consumer workflows in terms of efficiency and complexity. This paper is based on several interdisciplinary works combining comprehensive research with practical experience from an industrial perspective. We therefore present the Enterprise Data Marketplace as a distinct marketplace type and provide the basis for establishing it within a company.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2023-07&engl=0}
}
@article {ART-2023-03,
   author = {Dennis Treder-Tschechlov and Manuel Fritz and Holger Schwarz and Bernhard Mitschang},
   title = {{ML2DAC: Meta-Learning to Democratize AutoML for Clustering Analysis}},
   journal = {Proceedings of the ACM on Management of Data (SIGMOD)},
   publisher = {Association for Computing Machinery (ACM)},
   volume = {1},
   number = {2},
   pages = {1--26},
   type = {Artikel in Zeitschrift},
   month = {Juni},
   year = {2023},
   doi = {10.1145/3589289},
   language = {Deutsch},
   cr-category = {I.5.3 Pattern Recognition Clustering},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Analysts often struggle with the combined algorithm selection and hyperparameter optimization problem, a.k.a. CASH problem in literature. Typically, they execute several algorithms with varying hyperparameter settings to find configurations that show valuable results. Efficiently finding these configurations is a major challenge. In clustering analyses, analysts face the additional challenge to select a cluster validity index that allows them to evaluate clustering results in a purely unsupervised fashion. Many different cluster validity indices exist and each one has its benefits depending on the dataset characteristics. While experienced analysts might address these challenges using their domain knowledge and experience, especially novice analysts struggle with them. In this paper, we propose a new meta-learning approach to address these challenges. Our approach uses knowledge from past clustering evaluations to apply strategies that experienced analysts would exploit. In particular, we use meta-learning to (a) select a suitable clustering validity index, (b) efficiently select well-performing clustering algorithm and hyperparameter configurations, and (c) reduce the search space to suitable clustering algorithms. In the evaluation, we show that our approach significantly outperforms state-of-the-art approaches regarding accuracy and runtime.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2023-03&engl=0}
}
@article {ART-2023-02,
   author = {Vitali Hirsch and Peter Reimann and Dennis Treder-Tschechlov and Holger Schwarz and Bernhard Mitschang},
   title = {{Exploiting Domain Knowledge to address Class Imbalance and a Heterogeneous Feature Space in Multi-Class Classification}},
   journal = {International Journal on Very Large Data Bases (VLDB-Journal)},
   publisher = {Springer},
   type = {Artikel in Zeitschrift},
   month = {Februar},
   year = {2023},
   keywords = {Classification; Domain knowledge; Multi-class Imbalance; Heterogeneous feature space},
   language = {Englisch},
   cr-category = {H.2.8 Database Applications},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Real-world data of multi-class classification tasks often show complex data characteristics that lead to a reduced classification performance. Major analytical challenges are a high degree of multi-class imbalance within data and a heterogeneous feature space, which increases the number and complexity of class patterns. Existing solutions to classification or data pre- processing only address one of these two challenges in isolation. We propose a novel classification approach that explicitly addresses both challenges of multi-class imbalance and heterogeneous feature space together. As main contribution, this approach exploits domain knowledge in terms of a taxonomy to systematically prepare the training data. Based on an experimental evaluation on both real-world data and several synthetically generated data sets, we show that our approach outperforms any other classification technique in terms of accuracy. Furthermore, it entails considerable practical benefits in real-world use cases, e.g., it reduces rework required in the area of product quality control.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2023-02&engl=0}
}
@article {ART-2021-05,
   author = {Manuel Fritz and Michael Behringer and Dennis Tschechlov and Holger Schwarz},
   title = {{Efficient exploratory clustering analyses in large-scale exploration processes}},
   journal = {The VLDB Journal},
   editor = {Georgia Koutrika and Ren{\'e}e J. Miller and Kyuseok Shim},
   address = {Berlin, Heidelberg},
   publisher = {Springer Berlin Heidelberg},
   pages = {1--22},
   type = {Artikel in Zeitschrift},
   month = {November},
   year = {2021},
   doi = {10.1007/s00778-021-00716-y},
   issn = {1066-8888},
   keywords = {Exploratory clustering analysis; Exploration; Clustering; Centroid-based clustering},
   language = {Deutsch},
   cr-category = {H.3.3 Information Search and Retrieval},
   contact = {Senden Sie eine E-Mail an manuel.fritz@ipvs.uni-stuttgart.de.},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Clustering is a fundamental primitive in manifold applications. In order to achieve valuable results in exploratory clustering analyses, parameters of the clustering algorithm have to be set appropriately, which is a tremendous pitfall. We observe multiple challenges for large-scale exploration processes. On the one hand, they require specific methods to efficiently explore large parameter search spaces. On the other hand, they often exhibit large runtimes, in particular when large datasets are analyzed using clustering algorithms with super-polynomial runtimes, which repeatedly need to be executed within exploratory clustering analyses. We address these challenges as follows: First, we present LOG-Means and show that it provides estimates for the number of clusters in sublinear time regarding the defined search space, i.e., provably requiring less executions of a clustering algorithm than existing methods. Second, we demonstrate how to exploit fundamental characteristics of exploratory clustering analyses in order to significantly accelerate the (repetitive) execution of clustering algorithms on large datasets. Third, we show how these challenges can be tackled at the same time. To the best of our knowledge, this is the first work which simultaneously addresses the above-mentioned challenges. In our comprehensive evaluation, we unveil that our proposed methods significantly outperform state-of-the-art methods, thus especially supporting novice analysts for exploratory clustering analyses in large-scale exploration processes.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2021-05&engl=0}
}
@article {ART-2021-03,
   author = {Rebecca Eichler and Corinna Giebler and Christoph Gr{\"o}ger and Holger Schwarz and Bernhard Mitschang},
   title = {{Modeling metadata in data lakes—A generic model}},
   journal = {Data \& Knowledge Engineering},
   publisher = {Elsevier},
   volume = {136},
   pages = {1--17},
   type = {Artikel in Zeitschrift},
   month = {November},
   year = {2021},
   issn = {0169-023X},
   doi = {10.1016/j.datak.2021.101931},
   keywords = {Metadata management; Metadata model; Data lake; Data management; Data lake zones; Metadata classification},
   language = {Englisch},
   cr-category = {H.2 Database Management},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Data contains important knowledge and has the potential to provide new insights. Due to new technological developments such as the Internet of Things, data is generated in increasing volumes. In order to deal with these data volumes and extract the data{\^a}{\^a}‚¬{\^a}„˘s value new concepts such as the data lake were created. The data lake is a data management platform designed to handle data at scale for analytical purposes. To prevent a data lake from becoming inoperable and turning into a data swamp, metadata management is needed. To store and handle metadata, a generic metadata model is required that can reflect metadata of any potential metadata management use case, e.g., data versioning or data lineage. However, an evaluation of existent metadata models yields that none so far are sufficiently generic as their design basis is not suited. In this work, we use a different design approach to build HANDLE, a generic metadata model for data lakes. The new metadata model supports the acquisition of metadata on varying granular levels, any metadata categorization, including the acquisition of both metadata that belongs to a specific data element as well as metadata that applies to a broader range of data. HANDLE supports the flexible integration of metadata and can reflect the same metadata in various ways according to the intended utilization. Furthermore, it is created for data lakes and therefore also supports data lake characteristics like data lake zones. With these capabilities HANDLE enables comprehensive metadata management in data lakes. HANDLE{\^a}{\^a}‚¬{\^a}„˘s feasibility is shown through the application to an exemplary access-use-case and a prototypical implementation. By comparing HANDLE with existing models we demonstrate that it can provide the same information as the other models as well as adding further capabilities needed for metadata management in data lakes.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2021-03&engl=0}
}
@article {ART-2020-20,
   author = {Corinna Giebler and Christoph Gr{\"o}ger and Eva Hoos and Rebecca Eichler and Holger Schwarz and Bernhard Mitschang},
   title = {{Data Lakes auf den Grund gegangen - Herausforderungen und Forschungsl{\"u}cken in der Industriepraxis}},
   journal = {Datenbank Spektrum},
   publisher = {Springer},
   volume = {20},
   pages = {57--69},
   type = {Artikel in Zeitschrift},
   month = {Januar},
   year = {2020},
   keywords = {Data Lakes; Analytics; Stand der Technik; Herausforderungen; Praxisbeispiel},
   language = {Deutsch},
   cr-category = {H.4 Information Systems Applications},
   contact = {Senden Sie eine E-Mail an Corinna.Giebler@ipvs.uni-stuttgart.de},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Unternehmen stehen zunehmend vor der Herausforderung, gro{\ss}e, heterogene Daten zu verwalten und den darin enthaltenen Wert zu extrahieren. In den letzten Jahren kam darum der Data Lake als neuartiges Konzept auf, um diese komplexen Daten zu verwalten und zu nutzen. Wollen Unternehmen allerdings einen solchen Data Lake praktisch umsetzen, so sto{\ss}en sie auf vielf{\"a}ltige Herausforderungen, wie beispielsweise Widerspr{\"u}che in der Definition oder unscharfe und fehlende Konzepte. In diesem Beitrag werden konkrete Projekte eines global agierenden Industrieunternehmens genutzt, um bestehende Herausforderungen zu identifizieren und Anforderungen an Data Lakes herzuleiten. Diese Anforderungen werden mit der verf{\"u}gbaren Literatur zum Thema Data Lake sowie mit existierenden Ans{\"a}tzen aus der Forschung abgeglichen. Die Gegen{\"u}berstellung zeigt, dass f{\"u}nf gro{\ss}e Forschungsl{\"u}cken bestehen: 1. Unklare Datenmodellierungsmethoden, 2. Fehlende Data-Lake-Referenzarchitektur, 3. Unvollst{\"a}ndiges Metadatenmanagementkonzept, 4. Unvollst{\"a}ndiges Data-Lake-Governance-Konzept, 5. Fehlende ganzheitliche Realisierungsstrategie.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2020-20&engl=0}
}
@article {ART-2020-11,
   author = {Corinna Giebler and Christoph Gr{\"o}ger and Eva Hoos and Rebecca Eichler and Holger Schwarz and Bernhard Mitschang},
   title = {{Data Lakes auf den Grund gegangen - Herausforderungen und Forschungsl{\"u}cken in der Industriepraxis}},
   journal = {Datenbank Spektrum},
   publisher = {Springer-Verlag},
   volume = {20},
   pages = {57--69},
   type = {Artikel in Zeitschrift},
   month = {Januar},
   year = {2020},
   keywords = {Data Lakes; Industryerfahrung},
   language = {Deutsch},
   cr-category = {H.2.1 Database Management Logical Design},
   contact = {Senden Sie eine E-Mail an Corinna.Giebler@ipvs.uni-stuttgart.de},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Unternehmen stehen zunehmend vor der Herausforderung, gro{\ss}e, heterogene Daten zu verwalten und den darin enthaltenen Wert zu extrahieren. In den letzten Jahren kam darum der Data Lake als neuartiges Konzept auf, um diese komplexen Daten zu verwalten und zu nutzen. Wollen Unternehmen allerdings einen solchen Data Lake praktisch umsetzen, so sto{\ss}en sie auf vielf{\"a}ltige Herausforderungen, wie beispielsweise Widerspr{\"u}che in der Definition oder unscharfe und fehlende Konzepte. In diesem Beitrag werden konkrete Projekte eines global agierenden Industrieunternehmens genutzt, um bestehende Herausforderungen zu identifizieren und Anforderungen an Data Lakes herzuleiten. Diese Anforderungen werden mit der verf{\"u}gbaren Literatur zum Thema Data Lake sowie mit existierenden Ans{\"a}tzen aus der Forschung abgeglichen. Die Gegen{\"u}berstellung zeigt, dass f{\"u}nf gro{\ss}e Forschungsl{\"u}cken bestehen: 1. Unklare Datenmodellierungsmethoden, 2. Fehlende Data-Lake-Referenzarchitektur, 3. Unvollst{\"a}ndiges Metadatenmanagementkonzept, 4. Unvollst{\"a}ndiges Data-Lake-Governance-Konzept, 5. Fehlende ganzheitliche Realisierungsstrategie.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2020-11&engl=0}
}
@article {ART-2020-10,
   author = {Corinna Giebler and Christoph Gr{\"o}ger and Eva Hoos and Rebecca Eichler and Holger Schwarz and Bernhard Mitschang},
   title = {{Data Lakes auf den Grund gegangen - Herausforderungen und Forschungsl{\"u}cken in der Industriepraxis}},
   journal = {Datenbank Spektrum},
   publisher = {Springer-Verlag},
   volume = {20},
   pages = {57--69},
   type = {Artikel in Zeitschrift},
   month = {Januar},
   year = {2020},
   keywords = {Data Lakes; Industryerfahrung},
   language = {Deutsch},
   cr-category = {H.2.1 Database Management Logical Design},
   contact = {Senden Sie eine E-Mail an Corinna.Giebler@ipvs.uni-stuttgart.de},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Unternehmen stehen zunehmend vor der Herausforderung, gro{\ss}e, heterogene Daten zu verwalten und den darin enthaltenen Wert zu extrahieren. In den letzten Jahren kam darum der Data Lake als neuartiges Konzept auf, um diese komplexen Daten zu verwalten und zu nutzen. Wollen Unternehmen allerdings einen solchen Data Lake praktisch umsetzen, so sto{\ss}en sie auf vielf{\"a}ltige Herausforderungen, wie beispielsweise Widerspr{\"u}che in der Definition oder unscharfe und fehlende Konzepte. In diesem Beitrag werden konkrete Projekte eines global agierenden Industrieunternehmens genutzt, um bestehende Herausforderungen zu identifizieren und Anforderungen an Data Lakes herzuleiten. Diese Anforderungen werden mit der verf{\"u}gbaren Literatur zum Thema Data Lake sowie mit existierenden Ans{\"a}tzen aus der Forschung abgeglichen. Die Gegen{\"u}berstellung zeigt, dass f{\"u}nf gro{\ss}e Forschungsl{\"u}cken bestehen: 1. Unklare Datenmodellierungsmethoden, 2. Fehlende Data-Lake-Referenzarchitektur, 3. Unvollst{\"a}ndiges Metadatenmanagementkonzept, 4. Unvollst{\"a}ndiges Data-Lake-Governance-Konzept, 5. Fehlende ganzheitliche Realisierungsstrategie.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2020-10&engl=0}
}
@article {ART-2020-04,
   author = {Corinna Giebler and Christoph Gr{\"o}ger and Eva Hoos and Rebecca Eichler and Holger Schwarz and Bernhard Mitschang},
   title = {{Data Lakes auf den Grund gegangen: Herausforderungen und Forschungsl{\"u}cken in der Industriepraxis}},
   journal = {Datenbank-Spektrum},
   publisher = {Springer},
   volume = {20},
   number = {1},
   pages = {57--69},
   type = {Artikel in Zeitschrift},
   month = {Januar},
   year = {2020},
   doi = {10.1007/s13222-020-00332-0},
   keywords = {Data Lake; Analytics; Stand der Technik; Herausforderungen; Praxisbeispiel},
   language = {Deutsch},
   cr-category = {A.1 General Literature, Introductory and Survey,     E.0 Data General},
   ee = {https://rdcu.be/b0WM8},
   contact = {Senden Sie eine E-Mail an Corinna.Giebler@ipvs.uni-stuttgart.de},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Unternehmen stehen zunehmend vor der Herausforderung, gro{\ss}e, heterogene Daten zu verwalten und den darin enthaltenen Wert zu extrahieren. In den letzten Jahren kam darum der Data Lake als neuartiges Konzept auf, um diese komplexen Daten zu verwalten und zu nutzen. Wollen Unternehmen allerdings einen solchen Data Lake praktisch umsetzen, so sto{\ss}en sie auf vielf{\"a}ltige Herausforderungen, wie beispielsweise Widerspr{\"u}che in der Definition oder unscharfe und fehlende Konzepte. In diesem Beitrag werden konkrete Projekte eines global agierenden Industrieunternehmens genutzt, um bestehende Herausforderungen zu identifizieren und Anforderungen an Data Lakes herzuleiten. Diese Anforderungen werden mit der verf{\"u}gbaren Literatur zum Thema Data Lake sowie mit existierenden Ans{\"a}tzen aus der Forschung abgeglichen. Die Gegen{\"u}berstellung zeigt, dass f{\"u}nf gro{\ss}e Forschungsl{\"u}cken bestehen: 1. Unklare Datenmodellierungsmethoden, 2. Fehlende Data-Lake-Referenzarchitektur, 3. Unvollst{\"a}ndiges Metadatenmanagementkonzept, 4. Unvollst{\"a}ndiges Data-Lake-Governance-Konzept, 5. Fehlende ganzheitliche Realisierungsstrategie.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2020-04&engl=0}
}
@article {ART-2019-11,
   author = {Manuel Fritz and Osama Muazzen and Michael Behringer and Holger Schwarz},
   title = {{ASAP-DM: a framework for automatic selection of analytic platforms for data mining}},
   journal = {Software-Intensive Cyber-Physical Systems},
   publisher = {Springer Berlin Heidelberg},
   pages = {1--13},
   type = {Artikel in Zeitschrift},
   month = {August},
   year = {2019},
   issn = {2524-8510},
   isbn = {2524-8529},
   doi = {10.1007/s00450-019-00408-7},
   keywords = {Data mining; Analytic platform; Platform selection},
   language = {Englisch},
   cr-category = {E.0 Data General,     H.2.8 Database Applications,     H.3.3 Information Search and Retrieval},
   contact = {manuel.fritz@ipvs.uni-stuttgart.de},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {The plethora of analytic platforms escalates the difficulty of selecting the most appropriate analytic platform that fits the needed data mining task, the dataset as well as additional user-defined criteria. Especially analysts, who are rather focused on the analytics domain, experience difficulties to keep up with the latest developments. In this work, we introduce the ASAP-DM framework, which enables analysts to seamlessly use several platforms, whereas programmers can easily add several platforms to the framework. Furthermore, we investigate how to predict a platform based on specific criteria, such as lowest runtime or resource consumption during the execution of a data mining task. We formulate this task as an optimization problem, which can be solved by today{\^a}€™s classification algorithms. We evaluate the proposed framework on several analytic platforms such as Spark, Mahout, and WEKA along with several data mining algorithms for classification, clustering, and association rule discovery. Our experiments unveil that the automatic selection process can save up to 99.71\% of the execution time due to automatically choosing a faster platform.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2019-11&engl=0}
}
@article {ART-2019-07,
   author = {Manuel Fritz and Michael Behringer and Holger Schwarz},
   title = {{Quality-driven early stopping for explorative cluster analysis for big data}},
   journal = {Software-Intensive Cyber-Physical Systems},
   publisher = {Springer Berlin Heidelberg},
   pages = {1--12},
   type = {Artikel in Zeitschrift},
   month = {Februar},
   year = {2019},
   issn = {2524-8510},
   isbn = {2524-8529},
   doi = {10.1007/s00450-019-00401-0},
   keywords = {Clustering; Big Data; Early Stop; Convergence; Regression},
   language = {Englisch},
   cr-category = {E.0 Data General,     H.2.8 Database Applications,     H.3.3 Information Search and Retrieval},
   ee = {https://link.springer.com/article/10.1007/s00450-019-00401-0},
   contact = {manuel.fritz@ipvs.uni-stuttgart.de},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Data analysis has become a critical success factor for companies in all areas. Hence, it is necessary to quickly gain knowledge from available datasets, which is becoming especially challenging in times of big data. Typical data mining tasks like cluster analysis are very time consuming even if they run in highly parallel environments like Spark clusters. To support data scientists in explorative data analysis processes, we need techniques to make data mining tasks even more efficient. To this end, we introduce a novel approach to stop clustering algorithms as early as possible while still achieving an adequate quality of the detected clusters. Our approach exploits the iterative nature of many cluster algorithms and uses a metric to decide after which iteration the mining task should stop. We present experimental results based on a Spark cluster using multiple huge datasets. The experiments unveil that our approach is able to accelerate the clustering up to a factor of more than 800 by obliterating many iterations which provide only little gain in quality. This way, we are able to find a good balance between the time required for data analysis and quality of the analysis results.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2019-07&engl=0}
}
@article {ART-2016-12,
   author = {Pascal Hirmer and Matthias Wieland and Holger Schwarz and Bernhard Mitschang and Uwe Breitenb{\"u}cher and Santiago G{\'o}mez S{\'a}ez and Frank Leymann},
   title = {{Situation recognition and handling based on executing situation templates and situation-aware workflows}},
   journal = {Computing},
   publisher = {Springer},
   pages = {1--19},
   type = {Artikel in Zeitschrift},
   month = {Oktober},
   year = {2016},
   doi = {10.1007/s00607-016-0522-9},
   keywords = {Situation Recognition; IoT; Context; Integration; Cloud Computing; Workflows; Middleware},
   language = {Englisch},
   cr-category = {J.6 Computer-Aided Engineering,     H.3.1 Content Analysis and Indexing},
   ee = {http://dx.doi.org/10.1007/s00607-016-0522-9},
   contact = {pascal.hirmer@ipvs.uni-stuttgart.de},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Today, the Internet of Things has evolved due to an advanced interconnectivity of hardware devices equipped with sensors and actuators. Such connected environments are nowadays well-known as smart environments. Famous examples are smart homes, smart cities, and smart factories. Such environments should only be called {\ss}mart`` if they allow monitoring and self-organization. However, this is a great challenge: (1) sensors have to be bound and sensor data have to be efficiently provisioned to enable monitoring of these environments, (2) situations have to be detected based on sensor data, and (3) based on the recognized situations, a reaction has to be triggered to enable self-organization, e.g., through notification delivery or the execution of workflows. In this article, we introduce SitOPT---an approach for situation recognition based on raw sensor data and automated handling of occurring situations through notification delivery or execution of situation-aware workflows. This article is an extended version of the paper ''SitRS - Situation Recognition based on Modeling and Executing Situation Templates`` presented at the 9th Symposium and Summer School of Service-oriented Computing 2015.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2016-12&engl=0}
}
@article {ART-2013-09,
   author = {Sylvia Radesch{\"u}tz and Holger Schwarz and Florian Niedermann},
   title = {{Business impact analysis — a framework for a comprehensive analysis and optimization of business processes}},
   journal = {Computer Science – Research and Development},
   publisher = {Springer},
   pages = {1--18},
   type = {Artikel in Zeitschrift},
   month = {September},
   year = {2013},
   language = {Englisch},
   cr-category = {H.2 Database Management},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {The ability to continuously adapt its business processes is a crucial ability for any company in order to survive in todays dynamic world. In order to accomplish this task, a company needs to profoundly analyze all its business data. This generates the need for data integration and analysis techniques that allow for a comprehensive analysis. A particular challenge when conducting this analysis is the integration of process data generated by workflow engines and operational data that is produced by business applications and stored in data warehouses. Typically, these two types of data are not matched as their acquisition and analysis follows different principles, i.e., a process-oriented view versus a view focusing on business objects. To address this challenge, we introduce a framework that allows to improve business processes considering an integrated view on process data and operational data.We present and evaluate various architectural options for the data warehouse that provides this integrated view based on a specialized federation layer. This integrated view is also reflected in a set of operators that we introduce.We show how these operators ease the definition of analysis queries and how they allow to extract hidden optimization patterns by using data mining techniques.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2013-09&engl=0}
}
@article {ART-2011-19,
   author = {Bernhard Mitschang and Holger Schwarz},
   title = {{Der Lehrstuhl ”Datenbanken und Informationssysteme” an der Universit{\"a}t Stuttgart stellt sich vor}},
   journal = {Datenbank-Spektrum},
   publisher = {Springer},
   volume = {11},
   number = {3},
   pages = {213--217},
   type = {Artikel in Zeitschrift},
   month = {November},
   year = {2011},
   language = {Deutsch},
   cr-category = {H.2 Database Management},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {In diesem Beitrag stellen wir den Lehrstuhl f{\"u}r Datenbanken und Informationssysteme der Universit{\"a}t Stuttgart unter der Leitung von Prof. Dr. Bernhard Mitschang vor. Nach einem {\"U}berblick {\"u}ber die Forschungsschwerpunkte des Lehrstuhls gehen wir auf ausgew{\"a}hlte aktuelle Forschungsprojekte ein und erl{\"a}utern die Beteiligung an der Lehre in Bachelor- und Masterstudieng{\"a}ngen.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2011-19&engl=0}
}
@article {ART-2011-14,
   author = {Peter Reimann and Holger Schwarz and Bernhard Mitschang},
   title = {{Design, Implementation, and Evaluation of a Tight Integration of Database and Workflow Engines}},
   journal = {Journal of Information and Data Management},
   editor = {Alberto H. F. Laender and Mirella M. Moro},
   publisher = {SBC - Brazilian Computer Society},
   volume = {2},
   number = {3},
   pages = {353--368},
   type = {Artikel in Zeitschrift},
   month = {Oktober},
   year = {2011},
   issn = {2178-7107},
   keywords = {Data-Intensive Workflow; Improved Local Data Processing; Scientific Workflow; Simulation Workflow},
   language = {Englisch},
   cr-category = {D.2.11 Software Engineering Software Architectures,     H.2.8 Database Applications,     H.4.1 Office Automation},
   contact = {Peter Reimann Peter.Reimann@ipvs.uni-stuttgart.de},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Accessing and processing huge amounts of heterogeneous and distributed data are some of the major challenges of data-intensive workflows. Traditionally, the descriptions of such workflows focus on their data flow. Nevertheless, control-flow-oriented workflow languages are increasingly adapted to the needs of data-intensive workflows. This provides a common level of abstraction for both data-intensive workflows and classical orchestration workflows, e.g., business workflows, which then enables a comprehensive optimization across all workflows. However, the problem still remains that workflows described in control-flow-oriented languages tend to be less efficient for data-intensive processes compared to specialized data-flow-oriented approaches. In this paper, we propose a new kind of optimization targeted at data-intensive workflows that are described in control-flow-oriented languages. We show how to improve efficiency of such workflows by introducing various techniques that partition the local data processing tasks to be performed during workflow execution in an improved way. These data processing tasks are either assigned to the workflow engine or to the tightly integrated local database engine. We evaluate the effectiveness of these techniques by means of various test scenarios.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2011-14&engl=0}
}
@article {ART-2011-07,
   author = {Holger Schwarz},
   title = {{Generierung des Datenzugriffs in Anwendungsprogrammen: Anwendungsbereiche und Implementierungstechniken}},
   journal = {Datenbank Spektrum},
   address = {Heidelberg},
   publisher = {Springer},
   volume = {11},
   number = {1},
   pages = {5--14},
   type = {Artikel in Zeitschrift},
   month = {April},
   year = {2011},
   language = {Deutsch},
   cr-category = {H.4 Information Systems Applications},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Datenzugriffe auf externe und heterogene Datenbest{\"a}nde sind ein grundlegender Bestandteil von Anwendungsprogrammen in ganz unterschiedlichen Anwendungsbereichen. Vielfach k{\"o}nnen diese Datenzugriffe nicht {\"u}ber statisch eingebettete Anweisungen realisiert werden, sondern m{\"u}ssen dynamisch generiert werden. In diesem Beitrag wird das Spektrum relevanter Anwendungsbereiche vorgestellt. Ausgehend von einzelnen Systembeispielen werden wichtige Aspekte anfragegenerierender Systeme verallgemeinert. Hierzu wird eine Systemklassifikation vorgestellt und die Bedeutung der Klassifikation insbesondere f{\"u}r Optimierungsaspekte erl{\"a}utert. Ferner werden drei grundlegende Implementierungskonzepte f{\"u}r anfragegenerierende Systeme vorgestellt und deren Eignung f{\"u}r einzelne Anwendungsklassen diskutiert.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2011-07&engl=0}
}
@article {ART-2007-08,
   author = {Fabian Kaiser and Mih{\'a}ly Jakob and Sebastian Wiedersheim and Holger Schwarz},
   title = {{Framework-Unterst{\"u}tzung f{\"u}r aufwendige Websuche}},
   journal = {Datenbank-Spektrum},
   publisher = {dpunkt-Verlag},
   volume = {7},
   number = {23},
   pages = {13--20},
   type = {Artikel in Zeitschrift},
   month = {November},
   year = {2007},
   language = {Deutsch},
   cr-category = {H.3.3 Information Search and Retrieval},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Die Nutzung des WWW als wichtige Informationsquelle ist heute fester Bestandteil der t{\"a}glichen Arbeit. Komplexe Suchaufgaben resultieren hierbei h{\"a}ufig in nicht trivialen, lang andauernden Suchprozessen, in deren Rahmen gro{\ss}e Datenmengen verwaltet und analysiert werden m{\"u}ssen. Ein Beispiel hierf{\"u}r ist die Suche nach Experten zu einem gegebenen Themenkomplex. Dieser Beitrag stellt das Softwareframework Supernova vor, das derartige Suchprozesse unterst{\"u}tzt. Die flexible und erweiterbare Suchplattform erlaubt es, einen Focused Crawler mit Standardsuchmaschinen zu kombinieren, stellt diverse Analysekomponenten sowie die Infrastruktur f{\"u}r deren Daten- und Informationsaustausch bereit und bildet somit die Basis f{\"u}r eine effiziente Websuche bei komplexen Fragestellungen.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2007-08&engl=0}
}
@article {ART-2006-10,
   author = {Mih{\'a}ly Jakob and Fabian Kaiser and Holger Schwarz and Severin Beucker},
   title = {{Generierung von Webanwendungen f{\"u}r das Innovationsmanagement}},
   journal = {it - Information Technology},
   publisher = {Oldenbourg},
   volume = {48},
   number = {4},
   pages = {225--232},
   type = {Artikel in Zeitschrift},
   month = {August},
   year = {2006},
   language = {Deutsch},
   cr-category = {K.6.3 Software Management},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Der folgende Beitrag gibt einen {\"U}berblick {\"u}ber das vom Bundesministerium f{\"u}r Bildung und Forschung (BMBF) im Schwerpunktprogramm Internet{\"o}konomie gef{\"o}rderte Forschungsprojekt nova-net: Innovation in der Internet{\"o}konomie. Neben dem Forschungsrahmen und den Forschungsfeldern zur Unterst{\"u}tzung nachhaltiger Innovationsprozesse, wird insbesondere auf die Methodenentwicklung und deren informationstechnische Umsetzung im Themenfeld Trendmonitoring im Szenariomanagement eingegangen. Im Mittelpunkt steht hierbei die Erl{\"a}uterung des Szenario-Management-Frameworks SEMAFOR im Zusammenhang mit einer neu entwickelten Methode zur Entwicklung von Szenarien, sowie deren Umsetzung mittels des Webanwendungsgenerators WAGen.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2006-10&engl=0}
}
@article {ART-2004-01,
   author = {Holger Schwarz},
   title = {{Konzeptueller und logischer Data-Warehouse-Entwurf: Datenmodelle und Schematypen f{\"u}r Data Mining und OLAP}},
   journal = {Informatik Forschung und Entwicklung},
   publisher = {Springer},
   volume = {18},
   number = {2},
   pages = {53--67},
   type = {Artikel in Zeitschrift},
   month = {Januar},
   year = {2004},
   language = {Deutsch},
   cr-category = {H.2.1 Database Management Logical Design,     H.2.7 Database Administration,     H.2.8 Database Applications},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Im Data-Warehouse-Bereich gibt es sowohl auf der konzeptuellen als auch auf der logischen Ebene unterschiedliche Modellierungsans{\"a}tze, deren Entwicklung sich in der Regel an typischen Fragestellungen aus dem Bereich des Online Analytical Processing (OLAP) orientierte. Daneben spielen aber auch andere Ans{\"a}tze zur Analyse der Daten in einem Data Warehouse eine bedeutende Rolle. Ein wichtiger Vertreter ist Data Mining, mit dessen Hilfe bislang unbekannte Muster und Zusammenh{\"a}nge in Daten identifiziert werden k{\"o}nnen. Im vorliegenden Artikel wird untersucht, in wieweit sich die in der Literatur vorgeschlagenen konzeptuellen Datenmodelle f{\"u}r ein Data Warehouse eignen, das OLAP- und Data-Mining-Analysen gleicherma{\ss}en unterst{\"u}tzt. Hierzu wird das COCOM-Modell, das auf den Modellierungsm{\"o}glichkeiten verschiedener publizierter Modelle aufbaut, vorgestellt und bewertet. F{\"u}r das logische Schema eines Data Warehouse wird h{\"a}ufig ein sogenanntes Star-Schema oder ein Snowflake-Schema erstellt. F{\"u}r diese und weitere Schematypen wird analysiert, welchen Einflu{\ss} die Wahl des logischen Schemas auf Anwendungen aus den Bereichen OLAP und Data Mining hat. Wichtige Kriterien sind hier unter anderem der Informationsgehalt und die Performanz. Insgesamt zeigt dieser Beitrag, dass das COCOM-Modell und das Star-Schema eine gute Grundlage f{\"u}r den integrierten Einsatz von OLAP und Data-Mining bilden.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2004-01&engl=0}
}
@article {ART-2003-03,
   author = {Holger Schwarz and Tobias Kraft and Ralf Rantzau and Bernhard Mitschang},
   title = {{Optimierung von Anfragesequenzen in Business-Intelligence-Anwendungen}},
   journal = {it - Information Technology},
   address = {M{\"u}nchen},
   publisher = {Oldenbourg},
   volume = {45},
   number = {4},
   pages = {196--202},
   type = {Artikel in Zeitschrift},
   month = {August},
   year = {2003},
   keywords = {Data Warehouse, Business Intelligence, Anfragesequenzen, OLAP, Data Mining},
   language = {Deutsch},
   cr-category = {H.2.4 Database Management Systems,     H.2.7 Database Administration,     H.2.8 Database Applications},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Zur Analyse der Daten in einem Data Warehouse kommen unterschiedliche Business-Intelligence-Anwendungen zum Einsatz. Ein wichtiger Erfolgsfaktor f{\"u}r deren Nutzung ist die Effizienz, mit der die erstellten Anfragen ausgef{\"u}hrt werden. In diesem Beitrag wird zun{\"a}chst das typische Verarbeitungsszenario f{\"u}r generierte Anfragesequenzen im Bereich Business Intelligence erl{\"a}utert. Darauf aufbauend wird eine Reihe anwendungsneutraler Optimierungsstrategien erl{\"a}utert und bewertet. Anhand von Messergebnissen wird gezeigt, dass es sich insbesondere bei der Restrukturierung von Anfragesequenzen um einen vielversprechenden Ansatz handelt.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2003-03&engl=0}
}
@inbook {INBOOK-2013-02,
   author = {Sylvia Radesch{\"u}tz and Holger Schwarz and Marko Vrhovnik and Bernhard Mitschang},
   title = {{A Combination Framework for Exploiting the Symbiotic Aspects of Process and Operational Data in Business Process Optimization}},
   series = {Information Reuse and Integration in Academia and Industry},
   publisher = {Springer},
   pages = {29--49},
   type = {Beitrag in Buch},
   month = {September},
   year = {2013},
   language = {Deutsch},
   cr-category = {H.2 Database Management},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {A profound analysis of all relevant business data in a company is necessary for optimizing business processes effectively. Current analyses typically run either on business process execution data or on operational business data. Correlations among the separate data sets have to be found manually under big effort. However, to achieve a more informative analysis and to fully optimize a company's business, an efficient consolidation of all major data sources is indispensable. Recent matching algorithms are insufficient for this task since they are restricted either to schema or to process matching. We present a new matching framework to (semi-)automatically combine process data models and operational data models for performing such a profound business analysis. We describe the algorithms and basic matching rules underlying this approach as well as an experimental study that shows the achieved high recall and precision.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INBOOK-2013-02&engl=0}
}
@inbook {INBOOK-2009-04,
   author = {Rodrigo Salvador Monteiro and Geraldo Zimbr{\~a}o and Jano Moreira de Souza and Holger Schwarz and Bernhard Mitschang},
   title = {{Exploring Calendar-based Pattern Mining in Data Streams}},
   series = {Complex Data Warehousing and Knowledge Discovery for Advanced Retrieval Development: Innovative Methods and Applications},
   publisher = {IGI Global},
   pages = {1--30},
   type = {Beitrag in Buch},
   month = {Juni},
   year = {2009},
   isbn = {978-1-60566-748-5},
   language = {Englisch},
   cr-category = {H.2.8 Database Applications},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Finally, Chapter XVI introduces a calendar-based pattern mining that aims at identifying patterns on specific calendar partitions in continuous data streams. The authors present how a data warehouse approach can be applied to leverage calendar-based pattern mining in data streams and how the framework of the DWFIST approach can cope with tight time constraints imposed by data streams, keep storage requirements at a manageable level and, at the same time, support calendar-based frequent itemset mining. The minimum granularity of analysis, parameters of the data warehouse (e.g. mining minimum support) and parameters of the database (e.g. extent size) provide ways to tune the load performance.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INBOOK-2009-04&engl=0}
}
@inbook {INBOOK-2006-02,
   author = {Rodrigo Salvador Monteiro and Geraldo Zimbr{\~a}o and Holger Schwarz and Bernhard Mitschang and Jano Moreira De Souza},
   title = {{DWFIST: The Data Warehouse of Frequent Itemsets Tactics Approach}},
   series = {Processing and Managing Complex Data for Decision Support},
   publisher = {Idea Group Publishing},
   pages = {1--30},
   type = {Beitrag in Buch},
   month = {April},
   year = {2006},
   isbn = {1-59140-655-2},
   language = {Englisch},
   cr-category = {H.2.7 Database Administration,     H.2.8 Database Applications},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {This chapter presents the core of the DWFIST approach, which is concerned with supporting the analysis and exploration of frequent itemsets and derived patterns, e.g. association rules, in transactional datasets. The goal of this new approach is to provide (1) flexible pattern-retrieval capabilities without requiring the original data during the analysis phase, and (2) a standard modeling for data warehouses of frequent itemsets allowing an easier development and reuse of tools for analysis and exploration of itemset-based patterns. Instead of storing the original datasets, our approach organizes frequent itemsets holding on different partitions of the original transactions in a data warehouse that retains sufficient information for future analysis. A running example for mining calendar-based patterns on data streams is presented. Staging area tasks are discussed and standard conceptual and logical schemas are presented. Properties of this standard modeling allow to retrieve frequent itemsets holding on any set of partitions along with upper and lower bounds on their frequency counts. Furthermore, precision guarantees for some interestingness measures of association rules are provided as well.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INBOOK-2006-02&engl=0}
}
@inbook {INBOOK-2004-08,
   author = {Peter Breitling and Holger Schwarz and Mirka Zimmermann},
   title = {{Verwaltung der Lehr- und Lerninhalte in der Metadatenbank MITO}},
   series = {Information Technology Online: Online-gest{\"u}tztes Lehren und Lernen in informationstechnischen Studieng{\"a}ngen},
   address = {M{\"u}nster, New York, M{\"u}nchen, Berlin},
   publisher = {Waxmann},
   pages = {187--199},
   type = {Beitrag in Buch},
   month = {Juni},
   year = {2004},
   isbn = {3-8309-1358-3},
   language = {Deutsch},
   cr-category = {H.0 Information Systems General},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Verwaltung der Lehr- und Lerninhalte in der Metadatenbank MITO},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INBOOK-2004-08&engl=0}
}
@proceedings {PROC-2017-05,
   editor = {Bernhard Mitschang and Norbert Ritter and Holger Schwarz and Meike Klettke and Andreas Thor and Oliver Kopp and Matthias Wieland},
   title = {{Datenbanksysteme f{\"u}r Business, Technologie und Web (BTW 2017), 17. Fachtagung des GI-Fachbereichs ``Datenbanken und Informationssysteme'' (DBIS)}},
   publisher = {Gesellschaft f{\"u}r Informatik e.V. (GI)},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   series = {LNI},
   volume = {P266},
   pages = {410},
   type = {Tagungsband},
   month = {M{\"a}rz},
   year = {2017},
   isbn = {978-3-88579-660-2},
   language = {Englisch},
   cr-category = {H.4.1 Office Automation},
   ee = {http://dblp.org/db/conf/btw/btw2017w.html},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware;     Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme},
   abstract = {Datenbanksysteme f{\"u}r Business, Technologie und Web (BTW 2017) -- Workshopband},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=PROC-2017-05&engl=0}
}
@proceedings {PROC-2011-01,
   editor = {Theo H{\"a}rder and Wolfgang Lehner and Bernhard Mitschang and Harald Sch{\"o}ning and Holger Schwarz},
   title = {{Datenbanksysteme f{\"u}r Business, Technologie und Web (BTW 2011)}},
   publisher = {GI},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {760},
   type = {Tagungsband},
   month = {Februar},
   year = {2011},
   isbn = {978-3-88579-274-1},
   language = {Deutsch},
   cr-category = {H.2 Database Management},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware;     Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme},
   abstract = {The ”BTW” is a biennial conference series focusing on a broad range of topics addressing database management for Business, Technology, and Web. BTW 2011 as its 14th event took place in Kaiserslautern from March 2nd to 4th. This volume contains 24 long and 6 short papers selected for presentation at the conference, 9 industrial contributions, 3 papers or abstracts for the invited talks, 12 demonstration proposals, a panel description, and a paper written by the winner of the dissertation award. The subject areas include core database technology such as query optimization and indexing, DBMS-related prediction models, data streams, processing of large data sets, Web-based information extraction, benchmarking and simulation, and others.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=PROC-2011-01&engl=0}
}
@book {BOOK-2010-01,
   author = {Holger Schwarz},
   title = {{Anfragegenerierende Systeme: Anwendungsanalyse, Implementierungs- und Optimierungskonzepte}},
   address = {Wiesbaden},
   publisher = {Vieweg+Teubner},
   pages = {201},
   type = {Buch},
   month = {Juli},
   year = {2010},
   isbn = {978-3-8348-1298-8},
   language = {Deutsch},
   cr-category = {H.4 Information Systems Applications},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware;     Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme},
   abstract = {Flexible Datenzugriffe sind in aktuellen Informationssystemen zur Erf{\"u}llung der gestiegenen Nutzeranforderungen unabdingbar. In verschiedensten Anwendungsgebieten werden daher generative Ans{\"a}tze genutzt, um spezifische Anweisungen f{\"u}r Datenzugriffe bereitzustellen. Holger Schwarz diskutiert Ans{\"a}tze der Anfragegenerierung und beleuchtet Implementierungs- und Optimierungskonzepte. Die Generierungsans{\"a}tze erl{\"a}utert er unter anderem an konkreten Anwendungsszenarien aus Bereichen wie Business Intelligence, Workflow Management und Suchtechnologien. Das betrachtete Spektrum erstreckt sich von den einzelnen Anwendungen bis hin zu Werkzeugen, die bei der Anwendungsentwicklung zum Einsatz kommen. Zweiter Themenschwerpunkt sind aktuelle Ans{\"a}tze zur Optimierung komplexer Strukturen generierter Datenzugriffe. Diese werden vorgestellt und ihr Optimierungspotenzial an Beispielszenarien aufgezeigt.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=BOOK-2010-01&engl=0}
}
@book {BOOK-2007-02,
   editor = {Fabian Kaiser and Sven Schimpf and Holger Schwarz and Mih{\'a}ly Jakob and Severin Beucker},
   title = {{Internetgest{\"u}tzte Expertenidentifikation zur Unterst{\"u}tzung der fr{\"u}hen Innovationsphasen}},
   publisher = {Fraunhofer IRB Verlag},
   pages = {30},
   type = {Buch},
   month = {September},
   year = {2007},
   isbn = {978-3-8167-7448-8},
   language = {Deutsch},
   cr-category = {H.3.3 Information Search and Retrieval},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware;     Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme},
   abstract = {In dieser Brosch{\"u}re wird eine strukturierte Vorgehensweise beschrieben, mit der f{\"u}r einen Innovationsprozess relevante Themenfelder im und mit Hilfe des Internet identifiziert und spezifiziert, sowie, darauf aufbauend, Experten in diesen Themenfeldern gefunden werden k{\"o}nnen. Im zweiten Teil wird die informationstechnische Umsetzung dieser Vorgehensweise der internetgest{\"u}tzten Identifikation von Experten dargestellt. Anhand eines Anwendungsfalls wird gezeigt, wie Unternehmen durch diese Vorgehensweise im Bezug auf die Planung von Technologiefeldern in den fr{\"u}hen Innovationsphasen unterst{\"u}tzt werden k{\"o}nnen.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=BOOK-2007-02&engl=0}
}
@book {BOOK-2007-01,
   editor = {Mih{\'a}ly Jakob and Dierk-Oliver Kiehne and Holger Schwarz and Fabian Kaiser and Severin Beucker},
   title = {{Delphigest{\"u}tztes Szenario-Management und -Monitoring}},
   publisher = {Fraunhofer IRB Verlag},
   pages = {28},
   type = {Buch},
   month = {September},
   year = {2007},
   isbn = {978-3-8167-7449-5},
   language = {Deutsch},
   cr-category = {H.4 Information Systems Applications},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware;     Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme},
   abstract = {Die vorliegende Brosch{\"u}re ist Ergebnis des Forschungsprojektes nova-net: Innovation in der Internet{\"o}konomie. Das Projekt nova-net entwickelt Theorien, Strategien und Instrumente zur Unterst{\"u}tzung nachhaltiger Produkt- und Serviceinnovationen in der Internet{\"o}konomie. Dabei wird davon ausgegangen, dass das Internet sowohl eine Quelle als auch ein Instrument f{\"u}r nachhaltige unternehmerische Innovationsprozesse darstellt. In der vorliegenden Brosch{\"u}re wird der Ansatz des delphigest{\"u}tzten Szenario-Managements und seine softwaretechnische Unterst{\"u}tzung durch SEMAFOR vorgestellt.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=BOOK-2007-01&engl=0}
}
@book {BOOK-2006-02,
   author = {Mih{\'a}ly Jakob and Holger Schwarz and Fabian Kaiser},
   title = {{Technologie-Roadmap}},
   publisher = {Fraunhofer IRB Verlag},
   pages = {92},
   type = {Buch},
   month = {April},
   year = {2006},
   isbn = {3-8167-7047-9},
   language = {Deutsch},
   cr-category = {H.2.4 Database Management Systems,     H.2.7 Database Administration,     H.2.8 Database Applications},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Im Rahmen der vorliegenden Technologie-Roadmap werden Basis-, Integrations- und Anwendungstechnologien aufgezeigt, die die nachhaltige Gestaltung von Innovationsprozessen unterst{\"u}tzen k{\"o}nnen. Die einleitenden Kapitel stellen sowohl den Innovationsprozess und grunds{\"a}tzliche Themenfelder, die im Forschungsprojekt nova-net bearbeitet werden, dar als auch das so genannte Information Workflow, das den Informationsverarbeitungsprozess in sinnvolle Phasen unterteilt und die systematische L{\"o}sung von Innovationsproblemen erm{\"o}glicht. Weiterf{\"u}hrende Teile der Arbeit ordnen f{\"u}r das Innovationsmanagement relevante Technologien den einzelnen Informationsverarbeitungsphasen zu, und widmen sich speziellen Problemen, die in den Themenfeldern des Forschungsprojekts nova-net besondere Beachtung verdienen.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=BOOK-2006-02&engl=0}
}
@book {BOOK-2006-01,
   author = {Fabian Kaiser and Holger Schwarz and Mih{\'a}ly Jakob},
   title = {{Internetbasierte Expertensuche}},
   publisher = {Fraunhofer IRB Verlag},
   pages = {29},
   type = {Buch},
   month = {April},
   year = {2006},
   isbn = {3-8167-7042-8},
   language = {Deutsch},
   cr-category = {H.3.3 Information Search and Retrieval},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Dieses Arbeitspapier diskutiert Methoden und Techniken zur Suche von Personen mit Expertise auf frei definierbaren Themenfeldern. Der Suchraum konzentriert sich dabei auf das Internet und insbesondere die Dienste WWW und E-Mail. Es wird aufgezeigt, weshalb mittels herk{\"o}mmliche Suchmaschinen im Allgemeinen keine befriedigenden Ergebnisse erzielt werden k{\"o}nnen. Auf diesen Schwachpunkten aufbauend werden verschiedene Techniken aufgezeigt, deren Integration eine Expertensuche unterst{\"u}tzen. Im Vordergrund stehen dabei Textklassifizierungssysteme, Crawler und die Integration des Wissens von Suchmaschinen {\"u}ber Struktur und Inhalte des Word Wide Web.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=BOOK-2006-01&engl=0}
}