Next revision
|
Previous revision
Next revision
Both sides next revision
|
melange:papers:fall2019 [2019/08/30 13:56] jana created |
melange:papers:fall2019 [2019/11/04 17:19] jana |
@article{Verdoolaege:2012:ECS:2362389.2362390, author = {Verdoolaege, Sven and Janssens, Gerda and Bruynooghe, Maurice}, title = {Equivalence Checking of Static Affine Programs Using Widening to Handle Recurrences}, journal = {ACM Trans. Program. Lang. Syst.}, issue_date = {October 2012}, volume = {34}, number = {3}, month = nov, year = {2012}, issn = {0164-0925}, pages = {11:1–11:35}, articleno = {11}, numpages = {35}, url = {http://doi.acm.org/10.1145/2362389.2362390}, doi = {10.1145/2362389.2362390}, acmid = {2362390}, publisher = {ACM}, address = {New York, NY, USA}, keywords = {Commutativity, equivalence checking, polytope model, recurrences, widening}, } | @inproceedings{Rawat:2016:ERM:2884045.2884047, |
| author = {Rawat, Prashant Singh and Hong, Changwan and Ravishankar, Mahesh and Grover, Vinod and Pouchet, Louis-Noël and Sadayappan, P.}, |
| title = {Effective Resource Management for Enhancing Performance of 2D and 3D Stencils on GPUs}, |
| booktitle = {Proceedings of the 9th Annual Workshop on General Purpose Processing Using Graphics Processing Unit}, |
| series = {GPGPU '16}, |
| year = {2016}, |
| isbn = {978-1-4503-4195-0}, |
| location = {Barcelona, Spain}, |
| pages = {92--102}, |
| numpages = {11}, |
| url = {http://doi.acm.org/10.1145/2884045.2884047}, |
| doi = {10.1145/2884045.2884047}, |
| acmid = {2884047}, |
| publisher = {ACM}, |
| address = {New York, NY, USA}, |
| keywords = {GPGPU, resource management, stencil computations, tiling}, |
| } |
| |
| |
| |
| |
| @article{DBLP:journals/corr/abs-1805-02566, |
| author = {Hyoukjun Kwon and |
| Michael Pellauer and |
| Tushar Krishna}, |
| title = {Understanding Reuse, Performance, and Hardware Cost of DNN Dataflows: A Data-Centric Approach}, |
| journal = {CoRR}, |
| volume = {abs/1805.02566}, |
| year = {2018}, |
| url = {http://arxiv.org/abs/1805.02566}, |
| archivePrefix = {arXiv}, |
| eprint = {1805.02566}, |
| timestamp = {Mon, 13 Aug 2018 16:46:45 +0200}, |
| biburl = {https://dblp.org/rec/bib/journals/corr/abs-1805-02566}, |
| bibsource = {dblp computer science bibliography, https://dblp.org} |
| } |
| |
| |
| |
| |
| @inproceedings{Stock:2014:FED:2594291.2594342, |
| author = {Stock, Kevin and Kong, Martin and Grosser, Tobias and Pouchet, Louis-Noël and Rastello, Fabrice and Ramanujam, J. and Sadayappan, P.}, |
| title = {A Framework for Enhancing Data Reuse via Associative Reordering}, |
| booktitle = {Proceedings of the 35th ACM SIGPLAN Conference on Programming Language Design and Implementation}, |
| series = {PLDI '14}, |
| year = {2014}, |
| isbn = {978-1-4503-2784-8}, |
| location = {Edinburgh, United Kingdom}, |
| pages = {65--76}, |
| numpages = {12}, |
| url = {http://doi.acm.org/10.1145/2594291.2594342}, |
| doi = {10.1145/2594291.2594342}, |
| acmid = {2594342}, |
| publisher = {ACM}, |
| address = {New York, NY, USA}, |
| } |
| |
| @ARTICLE{7738524, author={Y. H. Chen and T. Krishna and J. S. Emer and V. Sze}, journal={IEEE Journal of Solid-State Circuits}, title={Eyeriss: An Energy-Efficient Reconfigurable Accelerator for Deep Convolutional Neural Networks}, year={2017}, volume={52}, number={1}, pages={127-138}, url = {http://ieeexplore.ieee.org/document/7738524/}, doi={10.1109/JSSC.2016.2616357}, ISSN={0018-9200}, month={Jan},} |
| |
| @article{Vasilache:2019:NAL:3366460.3355606, |
| author = {Vasilache, Nicolas and Zinenko, Oleksandr and Theodoridis, Theodoros and Goyal, Priya and Devito, Zachary and Moses, William S. and Verdoolaege, Sven and Adams, Andrew and Cohen, Albert}, |
| title = {The Next 700 Accelerated Layers: From Mathematical Expressions of Network Computation Graphs to Accelerated GPU Kernels, Automatically}, |
| journal = {ACM Trans. Archit. Code Optim.}, |
| issue_date = {October 2019}, |
| volume = {16}, |
| number = {4}, |
| month = oct, |
| year = {2019}, |
| issn = {1544-3566}, |
| pages = {38:1--38:26}, |
| articleno = {38}, |
| numpages = {26}, |
| url = {http://doi.acm.org/10.1145/3355606}, |
| doi = {10.1145/3355606}, |
| acmid = {3355606}, |
| publisher = {ACM}, |
| address = {New York, NY, USA}, |
| keywords = {Deep learning layers, GPU acceleration, polyhedral compilation}, |