@article{Vasilache:2019:NAL:3366460.3355606,
 author = {Vasilache, Nicolas and Zinenko, Oleksandr and Theodoridis, Theodoros and Goyal, Priya and Devito, Zachary and Moses, William S. and Verdoolaege, Sven and Adams, Andrew and Cohen, Albert},
 title = {The Next 700 Accelerated Layers: From Mathematical Expressions of Network Computation Graphs to Accelerated GPU Kernels, Automatically},
 journal = {ACM Trans. Archit. Code Optim.},
 issue_date = {November 2019},
 volume = {16},
 number = {4},
 month = oct,
 year = {2019},
 issn = {1544-3566},
 pages = {38:1--38:26},
 articleno = {38},
 numpages = {26},
 url = {http://doi.acm.org/10.1145/3355606},
 doi = {10.1145/3355606},
 acmid = {3355606},
 publisher = {ACM},
 address = {New York, NY, USA},
 keywords = {Deep learning layers, GPU acceleration, polyhedral compilation},
} 
@inproceedings{Augustine:2019:GPC:3314221.3314615,
 author = {Augustine, Travis and Sarma, Janarthanan and Pouchet, Louis-Noël and Rodríguez, Gabriel},
 title = {Generating Piecewise-regular Code from Irregular Structures},
 booktitle = {Proceedings of the 40th ACM SIGPLAN Conference on Programming Language Design and Implementation},
 series = {PLDI 2019},
 year = {2019},
 isbn = {978-1-4503-6712-7},
 location = {Phoenix, AZ, USA},
 pages = {625--639},
 numpages = {15},
 url = {http://doi.acm.org/10.1145/3314221.3314615},
 doi = {10.1145/3314221.3314615},
 acmid = {3314615},
 publisher = {ACM},
 address = {New York, NY, USA},
 keywords = {Polyhedral compilation, SpMV, sparse data structure, trace compression},
}
@inproceedings{Rawat:2016:ERM:2884045.2884047,
 author = {Rawat, Prashant Singh and Hong, Changwan and Ravishankar, Mahesh and Grover, Vinod and Pouchet, Louis-Noël and Sadayappan, P.},
 title = {Effective Resource Management for Enhancing Performance of 2D and 3D Stencils on GPUs},
 booktitle = {Proceedings of the 9th Annual Workshop on General Purpose Processing Using Graphics Processing Unit},
 series = {GPGPU '16},
 year = {2016},
 isbn = {978-1-4503-4195-0},
 location = {Barcelona, Spain},
 pages = {92--102},
 numpages = {11},
 url = {http://doi.acm.org/10.1145/2884045.2884047},
 doi = {10.1145/2884045.2884047},
 acmid = {2884047},
 publisher = {ACM},
 address = {New York, NY, USA},
 keywords = {GPGPU, resource management, stencil computations, tiling},
}


@article{DBLP:journals/corr/abs-1805-02566,
  author    = {Hyoukjun Kwon and
               Michael Pellauer and
               Tushar Krishna},
  title     = {Understanding Reuse, Performance, and Hardware Cost of DNN Dataflows: A Data-Centric Approach},
  journal   = {CoRR},
  volume    = {abs/1805.02566},
  year      = {2018},
  url       = {http://arxiv.org/abs/1805.02566},
  archivePrefix = {arXiv},
  eprint    = {1805.02566},
  timestamp = {Mon, 13 Aug 2018 16:46:45 +0200},
  biburl    = {https://dblp.org/rec/bib/journals/corr/abs-1805-02566},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}


@inproceedings{Stock:2014:FED:2594291.2594342,
 author = {Stock, Kevin and Kong, Martin and Grosser, Tobias and Pouchet, Louis-Noël and Rastello, Fabrice and Ramanujam, J. and Sadayappan, P.},
 title = {A Framework for Enhancing Data Reuse via Associative Reordering},
 booktitle = {Proceedings of the 35th ACM SIGPLAN Conference on Programming Language Design and Implementation},
 series = {PLDI '14},
 year = {2014},
 isbn = {978-1-4503-2784-8},
 location = {Edinburgh, United Kingdom},
 pages = {65--76},
 numpages = {12},
 url = {http://doi.acm.org/10.1145/2594291.2594342},
 doi = {10.1145/2594291.2594342},
 acmid = {2594342},
 publisher = {ACM},
 address = {New York, NY, USA},
}

@ARTICLE{7738524, author={Y. H. Chen and T. Krishna and J. S. Emer and V. Sze}, journal={IEEE Journal of Solid-State Circuits}, title={Eyeriss: An Energy-Efficient Reconfigurable Accelerator for Deep Convolutional Neural Networks}, year={2017}, volume={52}, number={1}, pages={127-138}, url = {http://ieeexplore.ieee.org/document/7738524/}, doi={10.1109/JSSC.2016.2616357}, ISSN={0018-9200}, month={Jan},} 

@article{Vasilache:2019:NAL:3366460.3355606,
 author = {Vasilache, Nicolas and Zinenko, Oleksandr and Theodoridis, Theodoros and Goyal, Priya and Devito, Zachary and Moses, William S. and Verdoolaege, Sven and Adams, Andrew and Cohen, Albert},
 title = {The Next 700 Accelerated Layers: From Mathematical Expressions of Network Computation Graphs to Accelerated GPU Kernels, Automatically},
 journal = {ACM Trans. Archit. Code Optim.},
 issue_date = {October 2019},
 volume = {16},
 number = {4},
 month = oct,
 year = {2019},
 issn = {1544-3566},
 pages = {38:1--38:26},
 articleno = {38},
 numpages = {26},
 url = {http://doi.acm.org/10.1145/3355606},
 doi = {10.1145/3355606},
 acmid = {3355606},
 publisher = {ACM},
 address = {New York, NY, USA},
 keywords = {Deep learning layers, GPU acceleration, polyhedral compilation},