Institut für Parallele und Verteilte Systeme (IPVS)

Publikationen

Eine Übersicht der Publikationen des Instituts für Parallele und Verteilte Systeme

Publikationen AS: Bibliographie 2022 BibTeX

 
@inproceedings {INPROC-2022-08,
   author = {Rebecca Eichler and Christoph Gr{\"o}ger and Eva Hoos and Christoph Stach and Holger Schwarz and Bernhard Mitschang},
   title = {{Establishing the Enterprise Data Marketplace: Characteristics, Architecture, and Challenges}},
   booktitle = {Proceedings of the Workshop on Data Science for Data Marketplaces in Conjunction with the 48th International Conference on Very Large Data Bases},
   editor = {Xiaohui Yu and Jian Pei},
   publisher = {-},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {1--12},
   type = {Workshop-Beitrag},
   month = {September},
   year = {2022},
   language = {Englisch},
   cr-category = {E.m Data Miscellaneous,     H.3.7 Digital Libraries,     H.4.m Information Systems Applications Miscellaneous},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Companies today have increasing amounts of data at their disposal, most of which is not used, leaving the data value unexploited. In order to leverage the data value, the data must be democratized, i.e., made available to the company employees. In this context, the use of enterprise data marketplaces, platforms for trading data within a company, are proposed. However, specifics of enterprise data marketplaces and how these can be implemented have not been investigated in literature so far. To shed light on these topics, we illustrate the characteristics of an enterprise data marketplace and highlight according marketplace requirements. We provide an enterprise data marketplace architecture, discuss how it integrates into a company's system landscape and present an enterprise data marketplace prototype. Finally, we examine organizational and technical challenges which arise when operating a marketplace in the enterprise context. In this paper, we thereby present the enterprise data marketplace as a distinct marketplace type and provide the basis for establishing it within a company.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2022-08&engl=0}
}
@inproceedings {INPROC-2022-07,
   author = {Yunxuan Li and Pascal Hirmer and Christoph Stach and Bernhard Mitschang},
   title = {{Ensuring Situation-Aware Privacy for Connected Vehicles}},
   booktitle = {Proceedings of the 12th International Conference on the Internet of Things (IoT); Delft, Netherlands, November 7 - 10, 2022},
   editor = {Evangelos Niforatos and Gerd Kortuem and Nirvana Meratnia and Josh Siegel and Florian Michahelles},
   address = {New York, NY, USA},
   publisher = {ACM},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {135--138},
   type = {Konferenz-Beitrag},
   month = {November},
   year = {2022},
   isbn = {978-1-4503-9665-3},
   doi = {10.1145/3567445.3569163},
   keywords = {Connected Vehicle; Situation-Awareness; Privacy-Preserving},
   language = {Englisch},
   cr-category = {K.4.1 Computers and Society Public Policy Issues,     K.6.5 Security and Protection},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {As technology advances in new sensors and software, modern vehicles become increasingly intelligent. To date, connected vehicles can collect, process, and share data with other entities in connected vehicle environments. However, in terms of data collection and exchange, privacy becomes a central issue. It is challenging to preserve privacy in connected vehicle environments when the privacy demands of drivers could change from situation to situation even for the same service. In this paper, we analyze the requirements for a privacy-preserving system in connected vehicle environments with a focus on situation-awareness and safety aspects. Based on the analysis, we propose a novel situation-aware privacy-preserving framework for connected vehicles. Our framework supports individual privacy protections for specific end-point services and situation-aware privacy protections for different circumstances.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2022-07&engl=0}
}
@inproceedings {INPROC-2022-06,
   author = {Julian Ziegler and Peter Reimann and Christoph Schulz and Florian Keller and Bernhard Mitschang},
   title = {{A Graph Structure to Discover Patterns in Unstructured Processes of Product Development}},
   booktitle = {Proceedings of the 23rd International Conference on Information Reuse and Integration for Data Science (IRI 2022)},
   publisher = {IEEE},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   type = {Konferenz-Beitrag},
   month = {August},
   year = {2022},
   keywords = {Process Discovery; Unstructured Processes; Process Patterns; Graph Data; Frequent Subgraph Mining},
   language = {Englisch},
   cr-category = {H.2.8 Database Applications},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {A well-known information reuse approach is to exploit event logs for process discovery and optimization. However, process discovery is rarely used for product development projects. This is because information systems in product development, e. g., Product-Lifecycle-Management (PLM) systems, do not provide the event logs required by process discovery algorithms. Additionally, existing algorithms struggle with development projects, as these are unstructured and rich in variety. In this paper, we propose a novel approach to process discovery in order to make it applicable and tailored to product development projects. Instead of using flat event logs, we provide a graph-based data structure that is able to represent both activities and data of product development projects with the dataflow between activities. Based on this structure, we can leverage provenance available in PLM systems. Furthermore, we may use frequent subgraph mining to discover process patterns. Such patterns are well suited to describe different variants and common sub-processes of unstructured processes. Using a prototype, we evaluate this approach and successfully discover prevailing patterns. These patterns may be used by engineers to support their decision-making or help improve the execution of development projects.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2022-06&engl=0}
}
@inproceedings {INPROC-2022-05,
   author = {Rebecca Eichler and Christoph Gr{\"o}ger and Eva Hoos and Holger Schwarz and Bernhard Mitschang},
   title = {{Data Shopping — How an Enterprise Data Marketplace Supports Data Democratization in Companies}},
   booktitle = {Proceedings of the 34th International Conference on Intelligent Information Systems},
   editor = {Jochen De Weerdt and Artem Polyvyanyy},
   address = {Stuttgart},
   publisher = {Springer International Publishing},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   series = {Rebecca Eichler},
   pages = {19--26},
   type = {Konferenz-Beitrag},
   month = {Mai},
   year = {2022},
   isbn = {https://doi.org/10.1007/978-3-031-07481-3_3},
   keywords = {Data Marketplace; Data Sharing; Data Democratization},
   language = {Englisch},
   cr-category = {H.0 Information Systems General},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {To exploit the company's data value, employees must be able to find, understand and access it. The process of making corporate data available to the majority of the company's employees is referred to as data democratization. In this work, we present the current state and challenges of data democratization in companies, derived from a comprehensive literature study and expert interviews we conducted with a manufacturer. In this context a data consumer's journey is presented that reflects the required steps, tool types and roles for finding, understanding and accessing data in addition to revealing three data democratization challenges. To address these challenges we propose the use of an enterprise data marketplace, a novel type of information system for sharing data within the company. We developed a prototype based on which a suitability assessment of a data marketplace yields an improved consumer journey and demonstrates that the marketplace addresses the data democratization challenges and consequently, shows that the marketplace is suited for realizing data democratization.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2022-05&engl=0}
}
@inproceedings {INPROC-2022-03,
   author = {Marco Spie{\ss} and Peter Reimann and Christian Weber and Bernhard Mitschang},
   title = {{Analysis of Incremental Learning andWindowing to handle Combined Dataset Shifts on Binary Classification for Product Failure Prediction}},
   booktitle = {Proceedings of the 24th International Conference on Enterprise Information Systems (ICEIS 2022)},
   publisher = {SciTePress},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   type = {Konferenz-Beitrag},
   month = {April},
   year = {2022},
   keywords = {Binary Classification; Dataset Shift; Incremental Learning; Product Failure Prediction; Windowing.},
   language = {Englisch},
   cr-category = {H.2.8 Database Applications},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Dataset Shifts (DSS) are known to cause poor predictive performance in supervised machine learning tasks. We present a challenging binary classification task for a real-world use case of product failure prediction. The target is to predict whether a product, e. g., a truck may fail during the warranty period. However, building a satisfactory classifier is difficult, because the characteristics of underlying training data entail two kinds of DSS. First, the distribution of product configurations may change over time, leading to a covariate shift. Second, products gradually fail at different points in time, so that the labels in training data may change, which may a concept shift. Further, both DSS show a trade-off relationship, i. e., addressing one of them may imply negative impacts on the other one. We discuss the results of an experimental study to investigate how different approaches to addressing DSS perform when they are faced with both a covariate and a concept shift. Thereby, we prove that existing approaches, e. g., incremental learning and windowing, especially suffer from the trade-off between both DSS. Nevertheless, we come up with a solution for a data-driven classifier that yields better results than a baseline solution that does not address DSS.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2022-03&engl=0}
}
@inproceedings {INPROC-2022-02,
   author = {Florian Hermann and Bowen Chen and Golsa Ghasemi and Valentin Stegmaier and Thomas Ackermann and Peter Reimann and Sabrina Vogt and Thomas Graf and Michael Weyrich},
   title = {{A Digital Twin Approach for the Prediction of the Geometry of Single Tracks Produced by Laser Metal Deposition}},
   booktitle = {Procedia CIRP: Proceedings of the 55th CIRP Conference on Manufacturing Systems (CIRP CMS 2022)},
   publisher = {Elsevier BV},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   type = {Konferenz-Beitrag},
   month = {Juni},
   year = {2022},
   keywords = {Laser metal deposition; Software-defined manufacturing; Digital Twin; Asset Administration Shell},
   language = {Englisch},
   cr-category = {H.2.8 Database Applications},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Flexible manufacturing processes such as laser metal deposition have a high potential for a production solely defined by software to cope with the current challenges of production systems. The determination of suitable machine parameters for the production of novel materials and geometries however requires extensive experimental effort. Existing simulative approaches do not offer sufficient accuracy to predict the relevant machine parameters in a satisfactory way. This paper presents a new concept, in which we apply a digital twin to provide a step towards a fully software-defined and predictable laser metal deposition process. The presented concept includes relevant data of the machines as well as data-driven machine learning models and physics-based simulation models. This enables a more reliable prediction of geometries of single tracks which was validated on a laser metal deposition machine.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2022-02&engl=0}
}
@inproceedings {INPROC-2022-01,
   author = {Christoph Stach and Cl{\'e}mentine Gritti and Dennis Przytarski and Bernhard Mitschang},
   title = {{Can Blockchains and Data Privacy Laws be Reconciled? A Fundamental Study of How Privacy-Aware Blockchains are Feasible}},
   booktitle = {Proceedings of the 37th ACM/SIGAPP Symposium On Applied Computing},
   editor = {Jiman Hong and Miroslav Bures and Ronald Petrlic and Christoph Sorge},
   address = {Brno},
   publisher = {ACM},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {1--10},
   type = {Konferenz-Beitrag},
   month = {April},
   year = {2022},
   isbn = {978-1-4503-8713-2},
   doi = {10.1145/3477314.3506986},
   keywords = {blockchains; immutable; tamper-proof; GDPR; privacy assessment},
   language = {Englisch},
   cr-category = {K.4.1 Computers and Society Public Policy Issues,     K.6.5 Security and Protection},
   contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de.},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Due to the advancing digitalization, the importance of data is constantly increasing. Application domains such as smart cars, smart cities, or smart healthcare rely on the permanent availability of large amounts of data to all parties involved. As a result, the value of data increases, making it a lucrative target for cyber-attacks. Particularly when human lives depend on the data, additional protection measures are therefore important for data management and provision. Blockchains, i.e., decentralized, immutable, and tamper-proof data stores, are becoming increasingly popular for this purpose. Yet, from a data protection perspective, the immutable and tamper-proof properties of blockchains pose a privacy concern. In this paper, we therefore investigate whether blockchains are in compliance with the General Data Protection Regulation (GDPR) if personal data are involved. To this end, we elaborate which articles of the GDPR are relevant in this regard and present technical solutions for those legal requirements with which blockchains are in conflict. We further identify open research questions that need to be addressed in order to achieve a privacy-by-design blockchain system.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2022-01&engl=0}
}
@article {ART-2022-09,
   author = {Christoph Stach},
   title = {{Editorial to the Special Issue on Security and Privacy in Blockchains and the IoT}},
   journal = {Future Internet},
   publisher = {MDPI},
   volume = {14},
   number = {11},
   pages = {1--4},
   type = {Artikel in Zeitschrift},
   month = {November},
   year = {2022},
   issn = {1999-5903},
   doi = {10.3390/fi14110317},
   language = {Englisch},
   cr-category = {D.4.6 Operating Systems Security and Protection,     K.4.1 Computers and Society Public Policy Issues,     K.6.5 Security and Protection},
   ee = {https://www.mdpi.com/1999-5903/14/11/317/htm},
   contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de.},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {In this day and age, data have become an immensely valuable resource. They are the key driver that puts the smart into smart services. This is fundamentally fueled by two technological achievements, namely the Internet of Things (IoT), which enables continuous and comprehensive collection of all kinds of data, and blockchain technologies, which provide secure data management and exchange. In addition to those information security measures, however, data privacy solutions are also required to protect the involved sensitive data. In this Special Issue, eight research papers address security and privacy challenges when dealing with blockchain technologies and the IoT. Concerning the IoT, solutions are presented on how IoT group communication can be secured and how trust within IoT applications can be increased. In the context of blockchain technologies, approaches are introduced on how the capabilities of query processing can be enhanced and how a proof-of-work consensus protocol can be efficiently applied in IoT environments. Furthermore, it is discussed how blockchain technologies can be used in IoT environments to control access to confidential IoT data as well as to enable privacy-aware data sharing. Finally, two reviews give an overview of the state of the art in in-app activity recognition based on convolutional neural networks and the prospects for blockchain technology applications in ambient assisted living.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2022-09&engl=0}
}
@article {ART-2022-08,
   author = {Christoph Stach and Cl{\'e}mentine Gritti and Dennis Przytarski and Bernhard Mitschang},
   title = {{Assessment and Treatment of Privacy Issues in Blockchain Systems}},
   journal = {ACM SIGAPP Applied Computing Review},
   publisher = {ACM},
   volume = {22},
   number = {3},
   pages = {5--24},
   type = {Artikel in Zeitschrift},
   month = {September},
   year = {2022},
   issn = {1559-6915},
   keywords = {blockchain; decentralized; immutable; tamper-proof; GDPR; privacy assessment; data purging; data authentication; permission control; privacy filters; privacy control environment},
   language = {Englisch},
   cr-category = {K.4.1 Computers and Society Public Policy Issues,     K.6.5 Security and Protection},
   contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de.},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {The ability to capture and quantify any aspect of daily life via sensors, enabled by the Internet of Things (IoT), data have become one of the most important resources of the 21st century. However, the high value of data also renders data an appealing target for criminals. Two key protection goals when dealing with data are therefore to maintain their permanent availability and to ensure their integrity. Blockchain technology provides a means of data protection that addresses both of these objectives. On that account, blockchains are becoming increasingly popular for the management of critical data. As blockchains are operated in a decentralized manner, they are not only protected against failures, but it is also ensured that neither party has sole control over the managed data. Furthermore, blockchains are immutable and tamper-proof data stores, whereby data integrity is guaranteed. While these properties are preferable from a data security perspective, they also pose a threat to privacy and confidentiality, as data cannot be concealed, rectified, or deleted once they are added to the blockchain. In this paper, we therefore investigate which features of the blockchain pose an inherent privacy threat when dealing with personal or confidential data. To this end, we consider to what extent blockchains are in compliance with applicable data protection laws, namely the European General Data Protection Regulation (GDPR). Based on our identified key issues, we assess which concepts and technical measures can be leveraged to address these issues in order to create a privacy-by-design blockchain system.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2022-08&engl=0}
}
@article {ART-2022-07,
   author = {Christoph Stach and Cl{\'e}mentine Gritti and Julia Br{\"a}cker and Michael Behringer and Bernhard Mitschang},
   title = {{Protecting Sensitive Data in the Information Age: State of the Art and Future Prospects}},
   journal = {Future Internet},
   publisher = {MDPI},
   volume = {14},
   number = {11},
   pages = {1--42},
   type = {Artikel in Zeitschrift},
   month = {Oktober},
   year = {2022},
   issn = {1999-5903},
   doi = {10.3390/fi14110302},
   keywords = {smart service; privacy techniques; location-based services; health services; voice-controlled digital assistants; image analysis; food analysis; recommender systems; DNA sequence classification},
   language = {Englisch},
   cr-category = {K.4.1 Computers and Society Public Policy Issues,     K.6.5 Security and Protection},
   ee = {https://www.mdpi.com/1999-5903/14/11/302/htm},
   contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de.},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {The present information age is characterized by an ever-increasing digitalization. Smart devices quantify our entire lives. These collected data provide the foundation for data-driven services called smart services. They are able to adapt to a given context and thus tailor their functionalities to the user's needs. It is therefore not surprising that their main resource, namely data, is nowadays a valuable commodity that can also be traded. However, this trend does not only have positive sides, as the gathered data reveal a lot of information about various data subjects. To prevent uncontrolled insights into private or confidential matters, data protection laws restrict the processing of sensitive data. One key factor in this regard is user-friendly privacy mechanisms. In this paper, we therefore assess current state-of-the-art privacy mechanisms. To this end, we initially identify forms of data processing applied by smart services. We then discuss privacy mechanisms suited for these use cases. Our findings reveal that current state-of-the-art privacy mechanisms provide good protection in principle, but there is no compelling one-size-fits-all privacy approach. This leads to further questions regarding the practicality of these mechanisms, which we present in the form of seven thought-provoking propositions.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2022-07&engl=0}
}
@article {ART-2022-06,
   author = {Christoph Stach and Michael Behringer and Julia Br{\"a}cker and Cl{\'e}mentine Gritti and Bernhard Mitschang},
   title = {{SMARTEN — A Sample-Based Approach towards Privacy-Friendly Data Refinement}},
   journal = {Journal of Cybersecurity and Privacy},
   publisher = {MDPI},
   volume = {2},
   number = {3},
   pages = {606--628},
   type = {Artikel in Zeitschrift},
   month = {August},
   year = {2022},
   issn = {2624-800X},
   doi = {10.3390/jcp2030031},
   keywords = {privacy; data refinement; data cleansing; data transformation; human-in-the-loop},
   language = {Englisch},
   cr-category = {K.4.1 Computers and Society Public Policy Issues,     K.6.5 Security and Protection},
   ee = {https://www.mdpi.com/2624-800X/2/3/31/htm},
   contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de.},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Two factors are crucial for the effective operation of modern-day smart services: Initially, IoT-enabled technologies have to capture and combine huge amounts of data on data subjects. Then, all these data have to be processed exhaustively by means of techniques from the area of big data analytics. With regard to the latter, thorough data refinement in terms of data cleansing and data transformation is the decisive cornerstone. Studies show that data refinement reaches its full potential only by involving domain experts in the process. However, this means that these experts need full insight into the data in order to be able to identify and resolve any issues therein, e.g., by correcting or removing inaccurate, incorrect, or irrelevant data records. In particular for sensitive data (e.g., private data or confidential data), this poses a problem, since these data are thereby disclosed to third parties such as domain experts. To this end, we introduce SMARTEN, a sample-based approach towards privacy-friendly data refinement to smarten up big data analytics and smart services. SMARTEN applies a revised data refinement process that fully involves domain experts in data pre-processing but does not expose any sensitive data to them or any other third-party. To achieve this, domain experts obtain a representative sample of the entire data set that meets all privacy policies and confidentiality guidelines. Based on this sample, domain experts define data cleaning and transformation steps. Subsequently, these steps are converted into executable data refinement rules and applied to the entire data set. Domain experts can request further samples and define further rules until the data quality required for the intended use case is reached. Evaluation results confirm that our approach is effective in terms of both data quality and data privacy.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2022-06&engl=0}
}
@article {ART-2022-01,
   author = {Christoph Stach and Julia Br{\"a}cker and Rebecca Eichler and Corinna Giebler and Bernhard Mitschang},
   title = {{Simplified Specification of Data Requirements for Demand-Actuated Big Data Refinement}},
   journal = {Journal of Data Intelligence},
   publisher = {Rinton Press},
   volume = {3},
   number = {3},
   pages = {366--400},
   type = {Artikel in Zeitschrift},
   month = {August},
   year = {2022},
   issn = {2577-610X},
   keywords = {data pre-processing; data transformation; knowledge modeling; ontology; data management; Data Lakes; zone model; food analysis},
   language = {Englisch},
   cr-category = {H.2.7 Database Administration,     E.2 Data Storage Representations,     H.3.3 Information Search and Retrieval,     H.2.8 Database Applications},
   contact = {Senden Sie eine E-Mail an christoph.stach@ipvs.uni-stuttgart.de.},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Anwendersoftware},
   abstract = {Data have become one of the most valuable resources in modern society. Due to increasing digitalization and the growing prevalence of the Internet of Things, it is possible to capture data on any aspect of today's life. Similar to physical resources, data have to be refined before they can become a profitable asset. However, such data preparation entails completely novel challenges: For instance, data are not consumed when being processed, whereby the volume of available data that needs to be managed increases steadily. Furthermore, the data preparation has to be tailored to the intended use case in order to achieve an optimal outcome. This, however, requires the knowledge of domain experts. Since such experts are typically not IT experts, they need tools that enable them to specify the data requirements of their use cases in a user-friendly manner. The goal of this data preparation is to provide any emerging use case with demand-actuated data. With this in mind, we designed a tailorable data preparation zone for Data Lakes called BARENTS. It provides a simplified method for domain experts to specify how data must be pre-processed for their use cases, and these data preparation steps are then applied automatically. The data requirements are specified by means of an ontology-based method which is comprehensible to non-IT experts. Data preparation and provisioning are realized resource-efficient by implementing BARENTS as a dedicated zone for Data Lakes. This way, BARENTS is seamlessly embeddable into established Big Data infrastructures. This article is an extended and revised version of the conference paper ``Demand-Driven Data Provisioning in Data Lakes: BARENTS - A Tailorable Data Preparation Zone'' by Stach et al. In comparison to our original conference paper, we take a more detailed look at related work in the paper at hand. The emphasis of this extended and revised version, however, is on strategies to improve the performance of BARENTS and enhance its functionality. To this end, we discuss in-depth implementation details of our prototype and introduce a novel recommender system in BARENTS that assists users in specifying data preparation steps.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2022-01&engl=0}
}
 
Zum Seitenanfang