@inproceedings{cohenautomatic, title={Automatic Intra-Array Storage Optimization}, author={Bhaskaracharya, Somashekaracharya G and Bondhugula, Uday and Cohen, Albert }, url={http://www.csa.iisc.ernet.in/TR/2014/3/paper.pdf}, series={IISc-CSA-TR-2014-3, Nov 2014 and ACM TOPLAS, Feb 2015}, year={2015}

@inproceedings{kong, title={Compiler/Runtime Framework for Dynamic Dataflow Parallelization of Tiled Programs}, author={Kong, Martin and Pop, Antoniu and Pouchet, Louis-Noël and Govindarajan R and Cohen, Albert and Sadayappan P}, url={http://dl.acm.org/citation.cfm?id=2687652}, series={ACM TACO, Jan 2015}, year={2015}

@inproceedings{roshan, title={Compiling Affine Loop Nests for a Dynamic Scheduling Runtime on Shared and Distributed Memory}, author={Dathathri, Roshan and Mullapudi, Ravi Teja and Bondhugula, Uday}, url={http://drona.csa.iisc.ernet.in/%7euday/publications/dynamic-scheduling-topc-draft.pdf}, series={TOPC 2016}, year={2016}

@inproceedings{Peter, title={Profitable Scheduling on Multiple Speed-Scalable Processors}, author={Kling, Peter and Pietrzyk, Peter}, url={http://dl.acm.org/citation.cfm?id=2809872}, series={ACM TOPC, Oct 2015}, year={2015}

@article{Ravishankar:2014, author = {Ravishankar, Mahesh and Eisenlohr, John and Pouchet, Louis-Noël and Ramanujam, J. and Rountev, Atanas and Sadayappan, P.}, title = {Automatic Parallelization of a Class of Irregular Loops for Distributed Memory Systems}, journal = {ACM TOPC}, issue_date = {September 2014}, volume = {1}, number = {1}, month = oct, year = {2014}, issn = {2329-4949}, pages = {7:1–7:37}, articleno = {7}, numpages = {37}, url = {http://doi.acm.org/10.1145/2660251}, doi = {10.1145/2660251}, acmid = {2660251}, publisher = {ACM}, address = {New York, NY, USA}, }

@inproceedings{Remote, title={Remote Memory Access Programming in MPI-3}, author={ Hoefler, Torsten and Dinan, James and Thakur, Rajeev and Barrett, Brian and Balaji,Pavan and Gropp, William and Underwood, Keith}, url={http://dl.acm.org/citation.cfm?doid=2798443.2780584}, series={ACM TOPC, July 2015}, articleno = {9}, year={2015}

@inproceedings{Noise, title={Noise-Tolerant Explicit Stencil Computations for Nonuniform Process Execution Rates}, author={ Hammouda, Adam and Siegel, Andrew R. and Siegel, Stephen F. }, url={http://dl.acm.org/citation.cfm?id=2742351}, series={ACM TOPC, May 2015}, articleno = {7}, year={2015}

@inproceedings{Torus, title={Collective Algorithms for Multiported Torus Networks}, author={Sack, Paul and Gropp, William }, url={http://dl.acm.org/citation.cfm?id=2686882}, series={ACM TOPC, Jan 2015}, articleno = {12}, year={2015}

@inproceedings{Avoid, title={Avoiding Communication in Successive Band Reduction}, author={Ballard, Grey and Demmel, James and Knight, Nicholas }, url={http://dl.acm.org/citation.cfm?id=2686877}, series={ACM TOPC, Jan 2015}, articleno = {11}, year={2015}

@inproceedings{Fault, title={Algorithm-Based Fault Tolerance for Dense Matrix Factorizations, Multiple Failures and Accuracy}, author={Bouteiller, Aurelien and Herault, Thomas and Bosilca, George and Du, Peng and Dongarra, Jack }, url={http://dl.acm.org/citation.cfm?id=2686877}, series={ACM TOPC, Jan 2015}, articleno = {10}, year={2015}

@inproceedings{Low-rank, title={Low-Rank Methods for Parallelizing Dynamic Programming Algorithms}, author={Maleki, Saeed and Musuvathi, Madanlal and Mytkowicz, Todd }, url={http://dl.acm.org/citation.cfm?id=2884065}, series={ACM TOPC, Feb 2016},articleno = {26}, year={2016}

@inproceedings{Cache, title={An experimental comparison of cache-oblivious and cache-conscious programs}, author={Yotov, Kamen and Roeder, Tom and Pingali, Keshav and Gunnels, John and Gustavson, Fred}, url={http://dl.acm.org/citation.cfm?id=1248394}, series={SPAA, 2007},pages = {93-104}, year={2007}

@inproceedings{Graph, title={High-Performance and Scalable GPU Graph Traversal}, author={Merrill, Duaue and Garland, Michael and Grimshaw, Andrew}, url={http://dl.acm.org/citation.cfm?id=2717511}, series={ACM TOPC, Jan 2015},articleno = {14}, year={2015}

@inproceedings{TLP, title={Compiler-Driven Software Speculation for Thread-Level Parallelism}, author={Yiapanis, Paraskevas and Brown, Gavin and Lujan, Mikel}, url={http://dl.acm.org/citation.cfm?id=2821505}, series={ACM TOPLAS, Jan 2016},articleno = {5}, year={2016}

@inproceedings{PARSECS, title={PARSECSs: Evaluating the Impact of Task Parallelism in the PARSEC Benchmark Suite}, author={Chasapis, Dimitrios and Casas, Marc and Moreto, Miquel and Vidal, Raul and Ayguade, eduard and Labarta, Jesus and Valero, Mateo}, url={http://dl.acm.org/citation.cfm?id=2829952}, series={ACM TACO, Jan 2016},articleno = {41}, year={2016}

@inproceedings{Nonlinear, title={The Polyhedral Model of Nonlinear Loops}, author={Sukumaran-Rajam, Aravind and Clauss, Philippe}, url={http://dl.acm.org/citation.cfm?id=2838734}, series={ACM TACO, Jan 2016},articleno = {48}, year={2016}

@inproceedings{Interleaved, title={Automatic Vectorization of Interleaved Data Revisited}, author={Anderson, Andrew and Malik, Avinash and Gregg, David}, url={http://dl.acm.org/citation.cfm?id=2838735}, series={ACM TACO, Jan 2016},articleno = {50}, year={2016}

@inproceedings{R-GPU, title={R-GPU: A Reconfigurable GPU Architecture}, author={Braak, Gert-Jan Van Der and Corporaal, Henk}, url={http://dl.acm.org/citation.cfm?id=2890506}, series={ACM TACO, March 2016},articleno = {12}, year={2016}

@inproceedings{SIMD, title={Exploiting recent SIMD architectural advances for irregular applications}, author={Chen, Linchuan and Jiang, Peng and Agrawal, Gagan}, url={http://dl.acm.org/citation.cfm?id=2854046}, series={CGO, 2016},pages = {47-58}, year={2016}

@inproceedings{FPGA, title={Performance modeling and optimization of 3-D stencil computation on a stream-based FPGA accelerator}, author={Fukumoto, Kota and shibata, Yuichiro and Oguri, Kiyoshi}, doi={10.1109/ReConFig.2013.6732318}, series={International Conference on Reconfigurable Computing and FPGAs (ReConFig), 2013},pages = {1-6}, year={2013}

@inproceedings{CMP, title={Fair CPU time accounting in CMP+SMT processors}, author={Luque, Carlos and Moreto, Miquel and Cazorla, Franciso J. and Valero, Mateo}, doi={10.1145/2400682.2400709}, series={ACM TACO, January 2013},articleno = {50}, year={2013}

@inproceedings{Compiler, title={A Compiler Approach for Exploiting Partial SIMD Parallelism}, author={Zhou, Hao and Xue, Jingling}, doi={10.1145/2886101}, series={ACM TACO, April 2016},articleno = {11}, year={2016}