This shows you the differences between two versions of the page.
Both sides previous revision Previous revision Next revision | Previous revision Next revision Both sides next revision | ||
melange:papers:fall2021 [2021/09/13 20:09] corentin |
melange:papers:fall2021 [2021/10/05 13:05] corentin |
||
---|---|---|---|
Line 23: | Line 23: | ||
address | address | ||
url = {https:// | url = {https:// | ||
- | abstract | + | abstract |
- | maximum, plays a crucial role in parallel programming. This paper presents a new approach, | + | |
- | reverse engineering, | + | |
- | sequential programs. The body of the sequential reduction loop is regarded as a black | + | |
- | box, and its input-output behaviors are sampled. If the behaviors correspond to a | + | |
- | set of linear polynomials over a semiring, a divide-and-conquer parallel reduction | + | |
- | is generated. Auxiliary reverse-engineering methods enable a long and nested loop | + | |
- | body to be decomposed, which makes our parallelization scheme applicable to various | + | |
- | types of reduction loops. This approach is not only simple and efficient but also | + | |
- | agnostic to the details of the input program. Its potential is demonstrated through | + | |
- | several use case scenarios. A proof-of-concept implementation successfully inferred | + | |
- | linear polynomials for nearly all of the 74 benchmarks exhaustively collected from | + | |
- | the literature. These characteristics and experimental results demonstrate the promise | + | |
- | of the proposed approach, despite its inherent unsoundness.}, | + | |
booktitle = {Proceedings of the 42nd ACM SIGPLAN International Conference on Programming Language Design and Implementation}, | booktitle = {Proceedings of the 42nd ACM SIGPLAN International Conference on Programming Language Design and Implementation}, | ||
+ | loc = {Proceedings of the 42nd ACM SIGPLAN International Conference on Programming Language Design and Implementation}, | ||
+ | number | ||
pages = {820–834}, | pages = {820–834}, | ||
numpages | numpages | ||
Line 51: | Line 40: | ||
url = {https:// | url = {https:// | ||
doi = {10.1145/ | doi = {10.1145/ | ||
- | abstract | + | abstract |
- | as scans or reductions. Many efforts over the past 2+ decades have focused on parallelizing | + | |
- | such loops by extracting and exploiting the hidden scan/ | + | |
- | approaches have largely been based on a heuristic search for closed-form composition | + | |
- | of computations across loop iterations.While the search-based approaches are successful | + | |
- | in parallelizing many recurrences, | + | |
- | program analysis. In this work, we propose a novel approach called sampling-and-reconstruction, | + | |
- | which avoids the search for closed-form composition and has the potential to cover | + | |
- | more recurrence loops. It is based on an observation that many recurrences can have | + | |
- | a point-value representation. The loop iterations are divided across processors, and | + | |
- | where the initial value(s) of the recurrence variable(s) are unknown, we execute with | + | |
- | several chosen (sampling) initial values. Then, correct final result can be obtained | + | |
- | by reconstructing the function from the outputs produced on the chosen initial values. | + | |
- | Our approach is effective in parallelizing linear, rectified-linear, | + | |
- | and multivariate recurrences, | + | |
- | Our evaluation shows that our approach can parallelize a diverse set of sequential | + | |
- | loops, including cases that cannot be parallelized by a state-of-the-art static parallelization | + | |
- | tool, and achieves linear scalability across multiple cores.}, | + | |
booktitle = {Proceedings of the 27th International Conference on Parallel Architectures and Compilation Techniques}, | booktitle = {Proceedings of the 27th International Conference on Parallel Architectures and Compilation Techniques}, | ||
+ | loc = {Proceedings of the 27th International Conference on Parallel Architectures and Compilation Techniques}, | ||
+ | number | ||
articleno = {10}, | articleno = {10}, | ||
numpages | numpages | ||
Line 75: | Line 49: | ||
location | location | ||
series | series | ||
+ | } | ||
+ | |||
+ | @misc{blleloch2019improved, | ||
+ | title = {Improved Parallel Cache-Oblivious Algorithms for Dynamic Programming and Linear Algebra}, | ||
+ | author | ||
+ | year = {2019}, | ||
+ | eprint | ||
+ | archivePrefix = {arXiv}, | ||
+ | primaryClass | ||
+ | loc = {arXiv}, | ||
+ | number | ||
+ | url = {https:// | ||
+ | } | ||
+ | |||
+ | @inproceedings{Henry_2021, | ||
+ | title = {Compilation of Sparse Array Programming Models}, | ||
+ | author | ||
+ | year = {2021}, | ||
+ | articleno | ||
+ | numpages | ||
+ | url = {http:// | ||
+ | publisher | ||
+ | loc = {Proc. ACM Program. Lang. 5}, | ||
+ | number | ||
+ | doi = {10.1145/ | ||
+ | } | ||
+ | |||
+ | @InProceedings{10.1007/ | ||
+ | author | ||
+ | editor | ||
+ | title = {On synthesizing systolic arrays from Recurrence Equations with Linear Dependencies}, | ||
+ | booktitle | ||
+ | year = {1986}, | ||
+ | publisher | ||
+ | address | ||
+ | pages = {488--503}, | ||
+ | abstract | ||
+ | isbn = {978-3-540-47239-1}, | ||
+ | loc = {Foundations of Software Technology and Theoretical Computer Science}, | ||
+ | number | ||
+ | doi = {10.1007/ | ||
+ | url = {https:// | ||
+ | } | ||
+ | |||
+ | @INPROCEEDINGS{145447, | ||
+ | author | ||
+ | booktitle | ||
+ | title = {Scheduling affine parameterized recurrences by means of Variable Dependent Timing Functions}, | ||
+ | year = {1990}, | ||
+ | volume | ||
+ | number | ||
+ | pages = {100-110}, | ||
+ | abstract | ||
+ | keywords | ||
+ | doi = {10.1109/ | ||
+ | ISSN = {}, | ||
+ | month = {Sep.}, | ||
+ | loc = {[1990] Proceedings of the International Conference on Application Specific Array Processors}, | ||
+ | url = {https:// | ||
+ | } | ||
+ | |||
+ | @InProceedings{9229617, | ||
+ | author | ||
+ | booktitle | ||
+ | title = {Efficient Execution of Dynamic Programming Algorithms on Apache Spark}, | ||
+ | year = {2020}, | ||
+ | volume | ||
+ | number | ||
+ | pages = {337-348}, | ||
+ | doi = {10.1109/ | ||
+ | loc = {[2020] IEEE International Conference on Cluster Computing (CLUSTER)}, | ||
+ | url = {https:// | ||
} | } | ||