User Tools

Site Tools


melange:papers:fall2021

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revision Previous revision
Next revision
Previous revision
melange:papers:fall2021 [2021/09/23 11:23]
corentin
melange:papers:fall2021 [2021/10/27 13:55]
corentin
Line 65: Line 65:
 @inproceedings{Henry_2021, @inproceedings{Henry_2021,
   title         = {Compilation of Sparse Array Programming Models},   title         = {Compilation of Sparse Array Programming Models},
-  author        = {Rawn Henry, Olivia Hsu, Rohan Yadav, Stephen Chou, Kunle Olukotun, Saman Amarasinghe, and Fredrik +  author        = {Rawn Henry, Olivia Hsu, Rohan Yadav, Stephen Chou, Kunle Olukotun, Saman Amarasinghe, and Fredrik Kjolstad},
-Kjolstad},+
   year          = {2021},   year          = {2021},
   articleno     = {128},   articleno     = {128},
Line 93: Line 92:
   url          = {https://link.springer.com/chapter/10.1007/3-540-17179-7_30}   url          = {https://link.springer.com/chapter/10.1007/3-540-17179-7_30}
 } }
 +
 +@INPROCEEDINGS{145447,
 +  author       = {Mauras, C. and Quinton, P. and Rajopadhye, S. and Saouter, Y.},
 +  booktitle    = {[1990] Proceedings of the International Conference on Application Specific Array Processors}, 
 +  title        = {Scheduling affine parameterized recurrences by means of Variable Dependent Timing Functions}, 
 +  year         = {1990},
 +  volume       = {},
 +  number       = {},
 +  pages        = {100-110},
 +  abstract     = {The authors present new scheduling techniques for systems of affine recurrence equations. They show that it is possible to extend earlier results on affine scheduling to the case when each variable of the system is scheduled independently of the others by an affine timing-function. This new technique makes it possible to analyze systems of recurrence equations with variables in different index spaces, and multi-step systolic algorithms. This theory applies directly to many problems, such as dynamic programming, LU decomposition, and 2-D convolution, and it avoids in particular preliminary heuristic rewriting of the equations.},
 +  keywords     = {},
 +  doi          = {10.1109/ASAP.1990.145447},
 +  ISSN         = {},
 +  month        = {Sep.},
 +  loc          = {[1990] Proceedings of the International Conference on Application Specific Array Processors},
 +  url          = {https://ieeexplore.ieee.org/document/145447?arnumber=145447}
 +}
 +
 +@InProceedings{9229617,
 +  author       = {Mahdi Javanmard, Mohammad and Ahmad, Zafar and Zola, Jaroslaw and Pouchet, Louis-Noël and Chowdhury, Rezaul and Harrison, Robert},
 +  booktitle    = {2020 IEEE International Conference on Cluster Computing (CLUSTER)}, 
 +  title        = {Efficient Execution of Dynamic Programming Algorithms on Apache Spark}, 
 +  year         = {2020},
 +  volume       = {},
 +  number       = {},
 +  pages        = {337-348},
 +  doi          = {10.1109/CLUSTER49012.2020.00044},
 +  loc          = {[2020] IEEE International Conference on Cluster Computing (CLUSTER)},
 +  url          = {https://par.nsf.gov/servlets/purl/10224953}
 +}
 +
 +@inproceedings{10.1145/2684746.2689065,
 +  author       = {Li, Peng and Zhang, Peng and Pouchet, Louis-Noel and Cong, Jason},
 +  title        = {Resource-Aware Throughput Optimization for High-Level Synthesis},
 +  year         = {2015},
 +  isbn         = {9781450333153},
 +  publisher    = {Association for Computing Machinery},
 +  address      = {New York, NY, USA},
 +  url          = {https://doi.org/10.1145/2684746.2689065},
 +  doi          = {10.1145/2684746.2689065},
 +  abstract     = {With the emergence of robust high-level synthesis tools to automatically transform codes written in high-level languages into RTL implementations, the programming productivity when synthesising accelerators improves significantly. However, although the state-of-the-art high-level synthesis tools can offer high-quality designs for simple nested loop kernels, there is still a significant performance gap between the synthesized and the optimal design for real world complex applications with multiple loops.In this work we first demonstrate that maximizing the throughput of each individual loop is not always the most efficient approach to achieving the maximum system-level throughput. More area efficient non-fully pipelined design variants may outperform the fully-pipelined version by enabling larger degrees of parallelism. We develop an algorithm to determine the optimal resource usage and initiation intervals for each loop in the applications to achieve maximum throughput within a given area budget. We report experimental results on eight applications, showing an average of 31% performance speedup over state-of-the-art HLS solutions.},
 +  booktitle    = {Proceedings of the 2015 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays},
 +  pages        = {200–209},
 +  numpages     = {10},
 +  keywords     = {resource sharing, area constraint, throughput optimization, high-level synthesis},
 +  location     = {Monterey, California, USA},
 +  series       = {FPGA '15},
 +  loc          = {Proceedings of the 2015 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays},
 +  number       = {}
 +}
 +
melange/papers/fall2021.txt · Last modified: 2021/10/27 13:55 by corentin