3D-DRESD Lorenzo Pavesi
-
Upload
usrdresd -
Category
Technology
-
view
244 -
download
1
Transcript of 3D-DRESD Lorenzo Pavesi
Università Milano Bicocca
Studio di Tecniche di compilazione parallela
per architetture riconfigurabili
Pavesi Lorenzo
071042
Agenda
• Processore ibrido XiRisc
• PiCoGa e GriffyC
• Suif
• Compilatore x PiCoGa
• Risultati sperimentali
Hybrid Processors
● Semplice core non piu sufficiente
– Incremento delle performance
– Riduzione dei consumi (potenza, area)
● Core Configurabili
– Specializzazione ISA (Xtensa Tensilica, ARC)
– Ideali per applicazioni di Digital Signal Processing e Bit Level Manipulation
● Svilluppi futuri: Core riconfigurabile
– GarpChip, ASH
XiRisc+PiCoGa e GriffyC• Microcontrollore RISC 32bit• Architettura VLIW a 2 issues• Pipeline a 5 stadi • ISA Configurabile• Componente riconfigurabile
- PiCoGa 16x24 RLC• GriffyC superset Ansi-C
- Stile DFG
PGAop
+ +
+
A B C D
Y
A B C D
Y
● DFG
● Multi contesto (4 configurazione, 1 esecuzione)
GriffyC
L1 : sub a,a,2 rol b,b,a add d,d,a add c,b,d add i,i,1 bnz c,L1
sub a,a,2
add d,d,arol b,b,a
add c,b,d
add i,i,1
A
D
IB
L1 : sub a,2 rol b,a add d,a add c,b,d add i,i,1 bnz c,L1
PGAop a,b,d,i
[..]
for(;c!=0;i++)[a=a-2; b=b<<a;d=d+a;c=b+d;
][..]
[..]
PD_0=pga_allocate(myPGAop);[..]
for(;c!=0;i++)[pgadirect1(PD_0,a,i,b,d);
][..]
pga_deallocate(myPGAop); [..]
SUIF• Infrastruttura per compilatori
– ( http://suif.stanford.edu/ )
• Orientata alla ricerca e sviluppo
• Passi di compilazione modulari
• Sistema estendibile
Suifdriver
Pass- analyses- optimization
IR- suifnodes- basicnodes
Kernel- suifkernel- iokernel
MODULES
Machine SUIF
Optimization &
Analysis Algorithms
O P I
Target Machines
Compilation Environment( SUIF )
• Permette la costruzione di “back ends”
• Machine level intermediate forms
• Descrizione architettura targetSuif (v.2.1)
Machine SUIF-IR (qui è definito machine ir.hoof file)
OPIcfa
bvdmachine
cfg ssa
suifvm
x86 alphacma / ssa
picovm
ksta
ex1
m2gc
Parametrized
Target dependent
Compilation
Environment
is defined
Str.Anl
Flusso di compilazione per PiCoGA
C to SUIF
LIR
MACHINE-SUIF
CFG
STRUCTURAL ANALYSIS
KERNEL IDENTIFICATION
• Innermost while-region;
• “PiCoGa basic block” marking;
• selezione di sub-trees while-region contenenti solo PiCoGa Basic Block;
1
2
3
PiCoGa Kernel translation
• SSA representation
• Cti Cmove replacement
• Independent from Identification
- manual selected kernels translation
GRIFFY–C COMPILER
• Kernel ranking
• Kernel incapsulationKERNEL EXTRACTION
Generazione del GriffyC....................#i fndef PICOHEADER__provaTmp1#define PICOHEADER__provaTmp1#pragma fpga _provaTmp1 0x00 0 0{
/* Virtual register declarations */void * _vr0;double _vr1;float _vr2; _vr4 = (fl oat (*)[1])part_amplitude; _vr5 = ( float *)_vr4; _vr6 = ( float *)((char *)_vr5 + _vr3); _vr7 = *_vr6; _vr2 = ( float)_vr7; _vr1 = ( double)_vr2; printf(_vr0, i , _vr1);
}#pragma end#endif /*PICOHEADER__provaTmp1*/
......
........
......#ifndef PICOHEADER__provaTmp1#define PICOHEADER__provaTmp1#pragma fpga _provaTmp1 0x00 0 0{
/* Virtual register declarations */void * _vr0;double _vr1;float _vr2; _vr4 = (fl oat (*)[1])part_amplitude; _vr5 = ( float *)_vr4; _vr6 = ( float *)((char *)_vr5 + _vr3); _vr7 = *_vr6; _vr2 = ( float)_vr7; _vr1 = ( double)_vr2; printf(_vr0, i , _vr1);
}#pragma end#endif /*PICOHEADER__provaTmp1*/
......
........
......#i fndef PICOHEADER__provaTmp1#define PICOHEADER__provaTmp1#pragma fpga _provaTmp1 0x00 0 0{
/* Virtual register declarations */void * _vr0;double _vr1;float _vr2; _vr4 = (fl oat (*)[1])part_amplitude; _vr5 = ( float *)_vr4; _vr6 = ( float *)((char *)_vr5 + _vr3); _vr7 = *_vr6; _vr2 = ( float)_vr7; _vr1 = ( double)_vr2; printf(_vr0, i , _vr1);
}#pragma end#endif /*PICOHEADER__provaTmp1*/
CSUIF
SUIF(LIR)
Dismantling delle strutture di controlloFileSetBlock
FileBlock
procedure
procedure
procedure
FileBlock
procedure
procedure
MachineSUIF
CFG
picovm
Generazione del GriffyC
ControlTree
ANNOTED
Mach – SUIF ....................#i fndef PICOHEADER__provaTmp1#define PICOHEADER__provaTmp1#pragma fpga _provaTmp1 0x00 0 0{
/* Virtual register declarations */void * _vr0;double _vr1;float _vr2; _vr4 = (fl oat (*)[1])part_amplitude; _vr5 = ( float *)_vr4; _vr6 = ( float *)((char *)_vr5 + _vr3); _vr7 = *_vr6; _vr2 = ( float)_vr7; _vr1 = ( double)_vr2; printf(_vr0, i , _vr1);
}#pragma end#endif /*PICOHEADER__provaTmp1*/
PICOHEADER
......
........
......#ifndef PICOHEADER__provaTmp1#define PICOHEADER__provaTmp1#pragma fpga _provaTmp1 0x00 0 0{
/* Virtual register declarations */void * _vr0;double _vr1;float _vr2; _vr4 = (fl oat (*)[1])part_amplitude; _vr5 = ( float *)_vr4; _vr6 = ( float *)((char *)_vr5 + _vr3); _vr7 = *_vr6; _vr2 = ( float)_vr7; _vr1 = ( double)_vr2; printf(_vr0, i , _vr1);
}#pragma end#endif /*PICOHEADER__provaTmp1*/
......
........
......#ifndef PICOHEADER__provaTmp1#define PICOHEADER__provaTmp1#pragma fpga _provaTmp1 0x00 0 0{
/* Virtual register declarations */void * _vr0;double _vr1;float _vr2; _vr4 = (fl oat (*)[1])part_amplitude; _vr5 = ( float *)_vr4; _vr6 = ( float *)((char *)_vr5 + _vr3); _vr7 = *_vr6; _vr2 = ( float)_vr7; _vr1 = ( double)_vr2; printf(_vr0, i , _vr1);
}#pragma end#endif /*PICOHEADER__provaTmp1*/
......
........
......#ifndef PICOHEADER__provaTmp1#define PICOHEADER__provaTmp1#pragma fpga _provaTmp1 0x00 0 0{
/* Virtual register declarations */void * _vr0;double _vr1;float _vr2; _vr4 = (fl oat (*)[1])part_amplitude; _vr5 = ( float *)_vr4; _vr6 = ( float *)((char *)_vr5 + _vr3); _vr7 = *_vr6; _vr2 = ( float)_vr7; _vr1 = ( double)_vr2; printf(_vr0, i , _vr1);
}#pragma end#endif /*PICOHEADER__provaTmp1*/
FileSetBlock
FileBlock
procedure
procedure
procedure
FileBlock
procedure
procedure
kernel
Ottimizzazioni sul tipo di selezione
ottimizzazioni sul body del kernel
Selezione
2
3
Ranking &Estrazione
SSA M2GC
......
........
......#i fndef PICOHEADER__provaTmp1#define PICOHEADER__provaTmp1#pragma fpga _provaTmp1 0x00 0 0{
/* Virtual register declarations */void * _vr0;double _vr1;float _vr2; _vr4 = (fl oat (*)[1])part_amplitude; _vr5 = ( float *)_vr4; _vr6 = ( float *)((char *)_vr5 + _vr3); _vr7 = *_vr6; _vr2 = ( float)_vr7; _vr1 = ( double)_vr2; printf(_vr0, i , _vr1);
}#pragma end#endif /*PICOHEADER__provaTmp1*/
......
........
......#i fndef PICOHEADER__provaTmp1#define PICOHEADER__provaTmp1#pragma fpga _provaTmp1 0x00 0 0{
/* Virtual register declarations */void * _vr0;double _vr1;float _vr2; _vr4 = (fl oat (*)[1])part_amplitude; _vr5 = ( float *)_vr4; _vr6 = ( float *)((char *)_vr5 + _vr3); _vr7 = *_vr6; _vr2 = ( float)_vr7; _vr1 = ( double)_vr2; printf(_vr0, i , _vr1);
}#pragma end#endif /*PICOHEADER__provaTmp1*/
......
........
......#i fndef PICOHEADER__provaTmp1#define PICOHEADER__provaTmp1#pragma fpga _provaTmp1 0x00 0 0{
/* Virtual register declarations */void * _vr0;double _vr1;float _vr2; _vr4 = (fl oat (*)[1])part_amplitude; _vr5 = ( float *)_vr4; _vr6 = ( float *)((char *)_vr5 + _vr3); _vr7 = *_vr6; _vr2 = ( float)_vr7; _vr1 = ( double)_vr2; printf(_vr0, i , _vr1);
}#pragma end#endif /*PICOHEADER__provaTmp1*/
Structural Analysis
1X
Test e Risultati
• Applicazioni di codifica video– iDCT, quantizzazione
Block division
DCT
Storage
DCT QuantizeEntropyEncoder
IDCTEntropyDecoderImmagine
Reconstruct
Dequantiz
e
originale
Immagine
Test e Risultati
1 2 3 4 5 6 7 8 9 1 0 1 1 1 2 1 3 1 4 1 5 1 6 1 7 1 8 1 9 2 0 2 1 2 2 2 3 2 4 2 5 2 6 2 7
0
2 , 5
5
7 , 51 0
1 2 , 5
1 51 7 , 5
2 02 2 , 5
2 5
2 7 , 5
3 0
3 2 , 53 5
3 7 , 54 0
G a u g e
g a u g e
P e r c e n t u a l e
0
1 , 3
9 , 5 4
1 0 , 8
3 8 , 2
3 9 , 5
Conclusioni
● Realizzazione di un flusso di Compilazione completa
– Buon numero di kernel identificati
– Kernel di medie-piccole dimensioni
● Prototipo stabile e sufficientemente efficiente
Sviluppi Futuri
● Strategie di selezione più evolute
● Integrazione con il compilatore FastGriffy
● Nuovi passi di ottimizzazione
● Analisi Interprocedurali
– Incremento della dimensione media dei kernel accellerabili
● Aggiornamento ad evoluzione del PiCoGa
Domande?