User Tools

Site Tools


melange:papers:spring2017

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revision Previous revision
Next revision
Previous revision
Last revision Both sides next revision
melange:papers:spring2017 [2017/03/24 09:50]
prerana
melange:papers:spring2017 [2018/02/07 16:30]
prerana
Line 1: Line 1:
 +@inproceedings{Volkov:2008:BGT:1413370.1413402,
 + author = {Volkov, Vasily and Demmel, James W.},
 + title = {Benchmarking GPUs to Tune Dense Linear Algebra},
 + booktitle = {Proceedings of the 2008 ACM/IEEE Conference on Supercomputing},
 + series = {SC '08},
 + year = {2008},
 + isbn = {978-1-4244-2835-9},
 + location = {Austin, Texas},
 + pages = {31:1--31:11},
 + articleno = {31},
 + numpages = {11},
 + url = {http://dl.acm.org/citation.cfm?id=1413370.1413402},
 + acmid = {1413402},
 + publisher = {IEEE Press},
 + address = {Piscataway, NJ, USA},
 +}
 +
 +@Article{Griebl2000,
 +author="Griebl, Martin
 +and Feautrier, Paul
 +and Lengauer, Christian",
 +title="Index Set Splitting",
 +journal="International Journal of Parallel Programming",
 +year="2000",
 +month="Dec",
 +day="01",
 +volume="28",
 +number="6",
 +pages="607--631",
 +abstract="There are many algorithms for the space-time mapping of nested loops. Some of them even make the optimal choices within their framework. We propose a preprocessing phase for algorithms in the polytope model, which extends the model and yields space-time mappings whose schedule is, in some cases, orders of magnitude faster. These are cases in which the dependence graph has small irregularities. The basic idea is to split the index set of the loop nests into parts with a regular dependence structure and apply the existing space-time mapping algorithms to these parts individually. This work is based on a seminal idea in the more limited context of loop parallelization at the code level. We elevate the idea to the model level (our model is the polytope model), which increases its applicability by providing a clearer and wider range of choices at an acceptable analysis cost. Index set splitting is one facet in the effort to extend the power of the polytope model and to enable the generation of competitive target code.",
 +issn="1573-7640",
 +doi="10.1023/A:1007516818651",
 +url="https://doi.org/10.1023/A:1007516818651"
 +}
 +
 +@inproceedings{Irigoin:1988:SP:73560.73588,
 + author = {Irigoin, F. and Triolet, R.},
 + title = {Supernode Partitioning},
 + booktitle = {Proceedings of the 15th ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages},
 + series = {POPL '88},
 + year = {1988},
 + isbn = {0-89791-252-7},
 + location = {San Diego, California, USA},
 + pages = {319--329},
 + numpages = {11},
 + url = {http://doi.acm.org/10.1145/73560.73588},
 + doi = {10.1145/73560.73588},
 + acmid = {73588},
 + publisher = {ACM},
 + address = {New York, NY, USA},
 +
 +
 +@ARTICLE{7738524, 
 +author={Y. H. Chen and T. Krishna and J. S. Emer and V. Sze}, 
 +journal={IEEE Journal of Solid-State Circuits}, 
 +title={Eyeriss: An Energy-Efficient Reconfigurable Accelerator for Deep Convolutional Neural Networks}, 
 +year={2017}, 
 +volume={52}, 
 +number={1}, 
 +pages={127-138}, 
 +keywords={DRAM chips;data flow computing;energy conservation;feedforward neural nets;learning (artificial intelligence);neural net architecture;power aware computing;reconfigurable architectures;AI systems;AlexNet;CNN shapes;DRAM accesses;Eyeriss;MAC;RS dataflow reconfiguration;accelerator chip;convolutional layers;data movement energy cost;dataflow processing;deep convolutional neural networks;energy efficiency;energy-efficient reconfigurable accelerator;multiply and accumulation;off-chip DRAM;reconfiguring architecture;row stationary;spatial architecture;Clocks;Computer architecture;Hardware;Neural networks;Random access memory;Shape;Throughput;Convolutional neural networks (CNNs);dataflow processing;deep learning;energy-efficient accelerators;spatial architecture}, 
 +doi={10.1109/JSSC.2016.2616357}, 
 +ISSN={0018-9200}, 
 +month={Jan},}
 +
 +@ARTICLE{88484, 
 +author={J. H. Saltz and R. Mirchandaney and K. Crowley}, 
 +journal={IEEE Transactions on Computers}, 
 +title={Run-time parallelization and scheduling of loops}, 
 +year={1991}, 
 +volume={40}, 
 +number={5}, 
 +pages={603-612}, 
 +keywords={parallel programming;scheduling;Encore Multimax;automatic parallelization;automatic scheduling;compile-time information;concurrently executable loop iterations;do loop;execution time preprocessing;executors;inspector procedures;loop dependency analysis;loop indexes;run-time methods;run-time reordering;source code loop structures;symbolic transformation rules;transformed versions;wavefronts;Computer science;Costs;Failure analysis;Level set;NASA;Parallel processing;Performance analysis;Processor scheduling;Runtime}, 
 +doi={10.1109/12.88484}, 
 +ISSN={0018-9340}, 
 +month={May},}
 +
 +BibTeX | EndNote | ACM Ref
 +@article{Feautrier:2006:SSS:1217445.1217447,
 + author = {Feautrier, Paul},
 + title = {Scalable and Structured Scheduling},
 + journal = {Int. J. Parallel Program.},
 + issue_date = {October 2006},
 + volume = {34},
 + number = {5},
 + month = oct,
 + year = {2006},
 + issn = {0885-7458},
 + pages = {459--487},
 + numpages = {29},
 + url = {http://dx.doi.org/10.1007/s10766-006-0011-4},
 + doi = {10.1007/s10766-006-0011-4},
 + acmid = {1217447},
 + publisher = {Kluwer Academic Publishers},
 + address = {Norwell, MA, USA},
 + keywords = {automatic parallelization, scalability, structured scheduling},
 +
 +
 +@article{Verdoolaege:2012:ECS:2362389.2362390,
 + author = {Verdoolaege, Sven and Janssens, Gerda and Bruynooghe, Maurice},
 + title = {Equivalence Checking of Static Affine Programs Using Widening to Handle Recurrences},
 + journal = {ACM Trans. Program. Lang. Syst.},
 + issue_date = {October 2012},
 + volume = {34},
 + number = {3},
 + month = nov,
 + year = {2012},
 + issn = {0164-0925},
 + pages = {11:1--11:35},
 + articleno = {11},
 + numpages = {35},
 + url = {http://doi.acm.org/10.1145/2362389.2362390},
 + doi = {10.1145/2362389.2362390},
 + acmid = {2362390},
 + publisher = {ACM},
 + address = {New York, NY, USA},
 + keywords = {Commutativity, equivalence checking, polytope model, recurrences, widening},
 +}
 +
 +@inproceedings{Kulkarni:2007:OPR:1250734.1250759,
 + author = {Kulkarni, Milind and Pingali, Keshav and Walter, Bruce and Ramanarayanan, Ganesh and Bala, Kavita and Chew, L. Paul},
 + title = {Optimistic Parallelism Requires Abstractions},
 + booktitle = {Proceedings of the 28th ACM SIGPLAN Conference on Programming Language Design and Implementation},
 + series = {PLDI '07},
 + year = {2007},
 + isbn = {978-1-59593-633-2},
 + location = {San Diego, California, USA},
 + pages = {211--222},
 + numpages = {12},
 + url = {http://doi.acm.org/10.1145/1250734.1250759},
 + doi = {10.1145/1250734.1250759},
 + acmid = {1250759},
 + publisher = {ACM},
 + address = {New York, NY, USA},
 + keywords = {abstractions, irregular programs, optimistic parallelism},
 +}
 +
 +@inproceedings{sanjay-fsttcs86,
 +        Address = {New Delhi, India},
 +        Author = {Rajopadhye, S. V. and Purushothaman, S. and Fujimoto, R. M.},
 +        Booktitle = {Proceedings, Sixth Conference on Foundations of Software
 +                  Technology and Theoretical Computer Science},
 +        Key = {Rajopadhye86b},
 +        Month = {December},
 +        Pages = {488-503},
 +        Publisher = {Springer Verlag, LNCS~241},
 +        Title = {On Synthesizing Systolic Arrays from Recurrence Equations
 +                  with Linear Dependencies},
 +        Year = {1986}}
 +
 +@article{quinton-jvsp89,
 +        Author = {Quinton, P. and {Van Dongen}, V.},
 +        Journal = {Journal of {VLSI} Signal Processing},
 +        Number = 2,
 +        Pages = {95-113},
 +        Publisher = {Kluwer Academic Publishers, Boston},
 +        Title = {The Mapping of Linear Recurrence Equations on Regular Arrays},
 +        Volume = 1,
 +        Year = 1989}
 +
 +@Article{feautrier91,
 +author=         "Feautrier, P.",
 +title=          "Dataflow analysis of array and scalar references",
 +journal=        "International Journal of Parallel Programming",
 +year=           1991,
 +volume=         20,
 +number=         1,
 +pages=          "23-53",
 +month=          "Feb",
 +annote=         "This article explains how a simple imperative language
 +        program (consisting only of assignments, for loops with affine loop
 +        limits, and arrays with affine index expressions), can be statically
 +        analyzed to find the flow dependencies."
 +}
 +
 +@article{feautrier92a,
 +  author    = {Paul Feautrier},
 +  title     = {Some Efficient Solutions to the Affine Scheduling Problem.
 +              {Part I}.  One-dimensional Time},
 +  journal   = {International Journal of Parallel Programming},
 +  volume    = {21},
 +  number    = {5},
 +  year      = {1992},
 +  pages     = {313-347},
 +  ee        = {http://dx.doi.org/10.1007/BF01407835}
 +}
 +
 +@article{feautrier92b,
 +  author    = {Paul Feautrier},
 +  title     = {Some Efficient Solutions to the Affine Scheduling Problem.
 +               {Part II}. Multidimensional Time},
 +  journal   = {International Journal of Parallel Programming},
 +  volume    = {21},
 +  number    = {6},
 +  year      = {1992},
 +  pages     = {389-420},
 +  ee        = {http://dx.doi.org/10.1007/BF01379404}
 +}
 +
 +@Article{collard-etal-fuzzy-jpdc-1997,
 +  author =       {Collard, J-F. and Barthou, D. and Feautrier, P.},
 +  title =        {Fuzzy Array Data Flow Analysis},
 +  journal =      {Journal of Parallel and Distributed Computing},
 +  year =         1997,
 +  volume =       40,
 +  number =       2,
 +  pages =        {210-226},
 +  month =        {Feb}}
 +
 +
 +@InProceedings{BPCB10,
 +  author =        {Benabderrahmane, M.-W. and Pouchet, L.-N. and Cohen A. and
 +                  Bastoul, C.},
 +  title =         {The Polyhedral Model Is More Widely Applicable Than You
 +                  Think},
 +  booktitle =     {Proceedings of the International Conference on Compiler
 +                  Construction ({ETAPS CC'10})},
 +  year =          2010,
 +  series =        {LNCS},
 +  address =       {Paphos, Cyprus},
 +  pages =         {283--303},
 +  month =         Mar,
 +  publisher =     {Springer-Verlag},
 +
 +
 +
 +@ARTICAL{123, author={Sanket Tavarageri, Albert Hartono, Muthu Baskaran, Louis-Noel Pouchet,J. Ramanujam
 +and P. Sadayappan}, title={Parametric Tiling of Affine Loop Nests}, year={2010}, doi = {http://web.cse.ohio-state.edu/~pouchet.2/doc/cpc-article.10.pdf} ,}
 +
 +
 +
 +
 +@INPROCEEDINGS{6043234, 
 +author={A. Pedram and A. Gerstlauer and R. A. v. d. Geijn}, 
 +booktitle={ASAP 2011 - 22nd IEEE International Conference on Application-specific Systems, Architectures and Processors}, 
 +title={A high-performance, low-power linear algebra core}, 
 +year={2011}, 
 +pages={35-42}, 
 +keywords={floating point arithmetic;matrix multiplication;GFLOPS-W;application-specific custom hardware;floating point operations per second;linear algebra core;matrix computations;matrix-matrix multiplication;power consumption reduction;technology scaling;Bandwidth;Computer architecture;Hardware;Kernel;Linear algebra;Program processors;Registers}, 
 +doi={10.1109/ASAP.2011.6043234}, 
 +ISSN={1063-6862}, 
 +month={Sept},}
 +
 +
 +
 @inproceedings{Bandishti:2012:TSC:2388996.2389051, @inproceedings{Bandishti:2012:TSC:2388996.2389051,
  author = {Bandishti, Vinayaka and Pananilath, Irshad and Bondhugula, Uday},  author = {Bandishti, Vinayaka and Pananilath, Irshad and Bondhugula, Uday},
melange/papers/spring2017.txt ยท Last modified: 2018/02/08 13:40 by prerana