@inproceedings {INPROC-2018-08,
   author = {David Pfander and Gregor Dai{\ss} and Dirk Pfl{\"u}ger and Dominic Marcello and Hartmut Kaiser},
   title = {{Accelerating Octo-Tiger: Stellar Mergers on Intel Knights Landing with HPX}},
   booktitle = {Proceedings of the 6th International Workshop on OpenCL},
   publisher = {ACM},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {1--9},
   type = {Konferenz-Beitrag},
   month = {Mai},
   year = {2018},
   language = {Englisch},
   cr-category = {D.1 Programming Techniques,     D.3.4 Programming Languages Processors,     G.4 Mathematical Software},
   contact = {submitted},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Simulation gro{\ss}er Systeme},
   abstract = {The optimization of performance of complex simulation codes with high computational demands, such as Octo-Tiger, is an ongoing challenge. Octo-Tiger is an astrophysics code simulating the evolution of star systems based on the fast multipole method on adaptive octrees. It was implemented using high-level C++ libraries, specifically HPX and Vc, which allows its use on different hardware platforms. Recently, we have demonstrated excellent scalability in a distributed setting. In this paper, we study Octo-Tiger{\^a}€™s node-level performance on an Intel Knights Landing platform. We focus on the fast multipole method, as it is Octo-Tiger{\^a}€™s computationally most demanding component. By using HPX and a futurization approach, we can efficiently traverse the adaptive octrees in parallel. On the core-level, threads process sub-grids using multiple 743-element stencils. In numerical experiments, simulating the time evolution of a rotating star on an Intel Xeon Phi 7250 Knights Landing processor, Octo-Tiger shows good parallel efficiency and achieves up to 408 GFLOPS. This results in a speedup of 2x compared to a 24-core Skylake-SP platform, using the same high-level abstractions.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2018-08&engl=0}
}
@inproceedings {INPROC-2018-07,
   author = {David Pfander and Malte Brunn and Dirk Pfl{\"u}ger},
   title = {{AutoTuneTMP: Auto-Tuning in C++ With Runtime Template Metaprogramming}},
   booktitle = {2018 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW)},
   publisher = {IEEE},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {1--10},
   type = {Konferenz-Beitrag},
   month = {Mai},
   year = {2018},
   keywords = {auto-tuning; template metaprogramming; just-in-time compilation; performance engineering},
   language = {Englisch},
   cr-category = {D.3.4 Programming Languages Processors},
   contact = {submitted},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Simulation gro{\ss}er Systeme},
   abstract = {},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2018-07&engl=0}
}
@inproceedings {INPROC-2016-59,
   author = {Dirk Pfl{\"u}ger and David Pfander},
   title = {{Computational Efficiency vs. Maintainability and Portability. Experiences with the Sparse Grid Code SG++}},
   booktitle = {2016 Fourth International Workshop on Software Engineering for High Performance Computing in Computational Science and Engineering (SE-HPCCSE)},
   address = {Salt Lake City, UT, USA},
   publisher = {IEEE},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {17--25},
   type = {Konferenz-Beitrag},
   month = {November},
   year = {2016},
   isbn = {978-1-5090-5224-0},
   keywords = {software maintenance; software quality; computational efficiency; computational maintainability; computational portability; design decisions; software quality; sparse grid code SG++; Computational modeling; Hardware; Programming; Software engineering; Software quality; Usability},
   language = {Deutsch},
   cr-category = {G.1.0 Numerical Analysis General,     D.2.3 Software Engineering Coding Tools and Techniques,     D.2.11 Software Engineering Software Architectures,     D.2.13 Software Engineering Reusable Software},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Simulation gro{\ss}er Systeme},
   abstract = {},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2016-59&engl=0}
}
@inproceedings {INPROC-2016-56,
   author = {David Pfander and Alexander Heinecke and Dirk Pfl{\"u}ger},
   title = {{A New Subspace-Based Algorithm for Efficient Spatially Adaptive Sparse Grid Regression, Classification and Multi-evaluation}},
   booktitle = {Sparse Grids and Applications - Stuttgart 2014},
   editor = {Jochen Garcke and Dirk Pfl{\"u}ger},
   publisher = {Springer International Publishing},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   series = {Lecture Notes in Computational Science and Engineering},
   volume = {109},
   pages = {221--246},
   type = {Konferenz-Beitrag},
   month = {Januar},
   year = {2016},
   keywords = {Sparse Grids; Performance Optimization; Adaptivity; High-Performance Computing},
   language = {Deutsch},
   cr-category = {G.4 Mathematical Software,     D.1.3 Concurrent Programming},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Simulation gro{\ss}er Systeme},
   abstract = {},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2016-56&engl=0}
}
@inproceedings {INPROC-2016-53,
   author = {Dirk Pfl{\"u}ger and Miriam Mehl and Julian Valentin and Florian Lindner and David Pfander and Stefan Wagner and Daniel Graziotin and Yang Wang},
   title = {{The Scalability-Efficiency/Maintainability-Portability Trade-Off in Simulation Software Engineering: Examples and a Preliminary Systematic Literature Review}},
   booktitle = {Proceedings of 2016 Fourth International Workshop on Software Engineering for High Performance Computing in Computational Science and Engineering (SE-HPCCSE 2016), held in conjunction with SC16, Salt Lake City, Utah},
   publisher = {IEEE Computer Society; ACM},
   institution = {Universit{\"a}t Stuttgart, Fakult{\"a}t Informatik, Elektrotechnik und Informationstechnik, Germany},
   pages = {26--34},
   type = {Workshop-Beitrag},
   month = {November},
   year = {2016},
   doi = {10.1109/SE-HPCCSE.2016.008},
   keywords = {digital simulation; software maintenance; software portability; SLR; SSE; complex software; dynamic construction process; maintainability-portability trade-off; scalability-efficiency trade-off; simulation software engineering; systematic literature review; Computational modeling; Hardware; Mathematical model; Numerical models; Scalability; Software; Software engineering},
   language = {Englisch},
   cr-category = {D.2.0 Software Engineering General},
   ee = {https://dx.doi.org/10.1109/SE-HPCCSE.2016.008},
   department = {Universit{\"a}t Stuttgart, Institut f{\"u}r Parallele und Verteilte Systeme, Simulation gro{\ss}er Systeme},
   abstract = {},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2016-53&engl=0}
}