DSPRelated.com
Forums

Secondary bootloader Using DMA

Started by kingalbertv1 April 24, 2008
TI showing example of secondary bootloader to copy code from Flash
to SRAM and SDRAM. It seems that the code copy word by word from
flash to SRAM and SDRAM. Isn't it more efficient to use DMA to move
big blocks. How does one do this if DMA is to be used instead of
copying word by word.

;* ------------------- File: boot.asm -------------------------------
----*
.title "Flash bootup utility for 6713 dsk"
.option D,T
.length 102
.width 140

; global EMIF symbols defined for the c671x family
.include boot_c671x.h62

.sect ".boot_load"
.global _boot
.global ___binit__

.ref _c_int00

_boot:
;********************************************************************
****
;* DEBUG LOOP - COMMENT OUT B FOR NORMAL OPERATION
;********************************************************************
****

zero B1
_myloop: ; [!B1] B _myloop
nop 5
_myloopend: nop
;********************************************************************
****
;* CONFIGURE EMIF
;********************************************************************
****

;************************************************************
****
; *EMIF_GCTL = EMIF_GCTL_V;
;************************************************************
****
mvkl EMIF_GCTL,A4
|| mvkl EMIF_GCTL_V,B4

mvkh EMIF_GCTL,A4
|| mvkh EMIF_GCTL_V,B4

stw B4,*A4

;************************************************************
****
; *EMIF_CE0 = EMIF_CE0_V
;************************************************************
****
mvkl EMIF_CE0,A4
|| mvkl EMIF_CE0_V,B4

mvkh EMIF_CE0,A4
|| mvkh EMIF_CE0_V,B4

stw B4,*A4

;************************************************************
****
; *EMIF_CE1 = EMIF_CE1_V (setup for 8-bit async)
;************************************************************
***
mvkl EMIF_CE1,A4
|| mvkl EMIF_CE1_V,B4

mvkh EMIF_CE1,A4
|| mvkh EMIF_CE1_V,B4

stw B4,*A4

;************************************************************
****
; *EMIF_CE2 = EMIF_CE2_V (setup for 32-bit async)
;************************************************************
****
mvkl EMIF_CE2,A4
|| mvkl EMIF_CE2_V,B4

mvkh EMIF_CE2,A4
|| mvkh EMIF_CE2_V,B4

stw B4,*A4

;************************************************************
****
; *EMIF_CE3 = EMIF_CE3_V (setup for 32-bit async)
;************************************************************
****
mvkl EMIF_CE3,A4
|| mvkl EMIF_CE3_V,B4 ;

mvkh EMIF_CE3,A4
|| mvkh EMIF_CE3_V,B4

stw B4,*A4

;************************************************************
****
; *EMIF_SDRAMCTL = EMIF_SDRAMCTL_V
;************************************************************
****
mvkl EMIF_SDRAMCTL,A4
|| mvkl EMIF_SDRAMCTL_V,B4 ;

mvkh EMIF_SDRAMCTL,A4
|| mvkh EMIF_SDRAMCTL_V,B4

stw B4,*A4

;************************************************************
****
; *EMIF_SDRAMTIM = EMIF_SDRAMTIM_V
;************************************************************
****
mvkl EMIF_SDRAMTIM,A4
|| mvkl EMIF_SDRAMTIM_V,B4 ;

mvkh EMIF_SDRAMTIM,A4
|| mvkh EMIF_SDRAMTIM_V,B4

stw B4,*A4

;************************************************************
****
; *EMIF_SDRAMEXT = EMIF_SDRAMEXT_V
;************************************************************
****
mvkl EMIF_SDRAMEXT,A4
|| mvkl EMIF_SDRAMEXT_V,B4 ;

mvkh EMIF_SDRAMEXT,A4
|| mvkh EMIF_SDRAMEXT_V,B4

stw B4,*A4

; copy sections using linker generated copy table.
; ___binit__ is the start address of the linker generated copy table

mvkl ___binit__, a3 ; load copy table pointer
mvkh ___binit__, a3

ldw *a3++, b0 ; load first word of copy table
(record sz)
nop 4
shr b0, 16, b1 ; right shift to get the count (num
records)

copy_section_top:
ldw *a3++, b4 ; load flash start (load) address
ldw *a3++, a4 ; load ram start (run) address
ldw *a3++, b0 ; byte count (size)
nop 2

[!b1] b copy_done ; have we copied all sections?
nop 5

copy_loop:
ldb *B4++,B5
sub b0,1,b0 ; decrement counter
[b0] b copy_loop ; setup branch if not done
nop 2

stb B5,*A4++
nop 2

b copy_section_top
sub b1,1,b1 ; decrement counter
nop 4

; jump to _c_int00
copy_done:
mvkl .S2 _c_int00, B0
mvkh .S2 _c_int00, B0
b .S2 B0
nop 5

;; .boot_load don't need to copy, will happen automatically upon
dsp reset
kingalbertv1-

> TI showing example of secondary bootloader to copy code from Flash
> to SRAM and SDRAM. It seems that the code copy word by word from
> flash to SRAM and SDRAM. Isn't it more efficient to use DMA to move
> big blocks. How does one do this if DMA is to be used instead of
> copying word by word.

Since boot occurs one-time only, I doubt the authors of the secondary bootloader
cared much about efficiency. They would have cared more about easy-to-read code that
helps document the secondary bootload process, which is a bit complex.

But go ahead, knock yourself out, put in DMA support :-) If your manager asks what
you're doing, just say "learning" otherwise you can't justify the wasted time.

-Jeff

> ;* ------------------- File: boot.asm -------------------------------
> ----*
> .title "Flash bootup utility for 6713 dsk"
> .option D,T
> .length 102
> .width 140
>
> ; global EMIF symbols defined for the c671x family
> .include boot_c671x.h62
>
> .sect ".boot_load"
> .global _boot
> .global ___binit__
>
> .ref _c_int00
>
> _boot:
> ;********************************************************************
> ****
> ;* DEBUG LOOP - COMMENT OUT B FOR NORMAL OPERATION
> ;********************************************************************
> ****
>
> zero B1
> _myloop: ; [!B1] B _myloop
> nop 5
> _myloopend: nop
>
> ;********************************************************************
> ****
> ;* CONFIGURE EMIF
> ;********************************************************************
> ****
>
> ;************************************************************
> ****
> ; *EMIF_GCTL = EMIF_GCTL_V;
> ;************************************************************
> ****
> mvkl EMIF_GCTL,A4
> || mvkl EMIF_GCTL_V,B4
>
> mvkh EMIF_GCTL,A4
> || mvkh EMIF_GCTL_V,B4
>
> stw B4,*A4
>
> ;************************************************************
> ****
> ; *EMIF_CE0 = EMIF_CE0_V
> ;************************************************************
> ****
> mvkl EMIF_CE0,A4
> || mvkl EMIF_CE0_V,B4
>
> mvkh EMIF_CE0,A4
> || mvkh EMIF_CE0_V,B4
>
> stw B4,*A4
>
> ;************************************************************
> ****
> ; *EMIF_CE1 = EMIF_CE1_V (setup for 8-bit async)
> ;************************************************************
> ***
> mvkl EMIF_CE1,A4
> || mvkl EMIF_CE1_V,B4
>
> mvkh EMIF_CE1,A4
> || mvkh EMIF_CE1_V,B4
>
> stw B4,*A4
>
> ;************************************************************
> ****
> ; *EMIF_CE2 = EMIF_CE2_V (setup for 32-bit async)
> ;************************************************************
> ****
> mvkl EMIF_CE2,A4
> || mvkl EMIF_CE2_V,B4
>
> mvkh EMIF_CE2,A4
> || mvkh EMIF_CE2_V,B4
>
> stw B4,*A4
>
> ;************************************************************
> ****
> ; *EMIF_CE3 = EMIF_CE3_V (setup for 32-bit async)
> ;************************************************************
> ****
> mvkl EMIF_CE3,A4
> || mvkl EMIF_CE3_V,B4 ;
>
> mvkh EMIF_CE3,A4
> || mvkh EMIF_CE3_V,B4
>
> stw B4,*A4
>
> ;************************************************************
> ****
> ; *EMIF_SDRAMCTL = EMIF_SDRAMCTL_V
> ;************************************************************
> ****
> mvkl EMIF_SDRAMCTL,A4
> || mvkl EMIF_SDRAMCTL_V,B4 ;
>
> mvkh EMIF_SDRAMCTL,A4
> || mvkh EMIF_SDRAMCTL_V,B4
>
> stw B4,*A4
>
> ;************************************************************
> ****
> ; *EMIF_SDRAMTIM = EMIF_SDRAMTIM_V
> ;************************************************************
> ****
> mvkl EMIF_SDRAMTIM,A4
> || mvkl EMIF_SDRAMTIM_V,B4 ;
>
> mvkh EMIF_SDRAMTIM,A4
> || mvkh EMIF_SDRAMTIM_V,B4
>
> stw B4,*A4
>
> ;************************************************************
> ****
> ; *EMIF_SDRAMEXT = EMIF_SDRAMEXT_V
> ;************************************************************
> ****
> mvkl EMIF_SDRAMEXT,A4
> || mvkl EMIF_SDRAMEXT_V,B4 ;
>
> mvkh EMIF_SDRAMEXT,A4
> || mvkh EMIF_SDRAMEXT_V,B4
>
> stw B4,*A4
>
> ; copy sections using linker generated copy table.
> ; ___binit__ is the start address of the linker generated copy table
>
> mvkl ___binit__, a3 ; load copy table pointer
> mvkh ___binit__, a3
>
> ldw *a3++, b0 ; load first word of copy table
> (record sz)
> nop 4
> shr b0, 16, b1 ; right shift to get the count (num
> records)
>
> copy_section_top:
> ldw *a3++, b4 ; load flash start (load) address
> ldw *a3++, a4 ; load ram start (run) address
> ldw *a3++, b0 ; byte count (size)
> nop 2
>
> [!b1] b copy_done ; have we copied all sections?
> nop 5
>
> copy_loop:
> ldb *B4++,B5
> sub b0,1,b0 ; decrement counter
> [b0] b copy_loop ; setup branch if not done
> nop 2
>
> stb B5,*A4++
> nop 2
>
> b copy_section_top
> sub b1,1,b1 ; decrement counter
> nop 4
>
> ; jump to _c_int00
> copy_done:
> mvkl .S2 _c_int00, B0
> mvkh .S2 _c_int00, B0
> b .S2 B0
> nop 5
>
> ;; .boot_load don't need to copy, will happen automatically upon
> dsp reset
Truong-
> In your application maybe, but in mine the requirement is extremely stringent for
> the time line. So no my management will spend all the money needed to do it no
> waste here.

That I doubt. During boot, you don't need the CPU and DMA to share onchip internal
mem busses -- which is the point of DMA. Also:

-transfer time will be limited by the access time
of the Flash, which is an async device

-L2 cache won't help since every SDRAM location
is a one-time write during boot

Clearly the boot process has time limitation factors that DMA cannot meaningfully
reduce.

You need to take some time to understand the purpose of DMA and fully understand all
of the various chip architectures you are working with. If your manager who is going
to spend "all the money" is reading this he's going to wonder about how you spend
your time and also your communication skills. Take my word for that as a manager.

-Jeff

PS. Please post to the group, not to me.

> Jeff Brower wrote:
>
> kingalbertv1-
>
> > TI showing example of secondary bootloader to copy code from Flash
> > to SRAM and SDRAM. It seems that the code copy word by word from
> > flash to SRAM and SDRAM. Isn't it more efficient to use DMA to move
> > big blocks. How does one do this if DMA is to be used instead of
> > copying word by word.
>
> Since boot occurs one-time only, I doubt the authors of the secondary
> bootloader
> cared much about efficiency. They would have cared more about
> easy-to-read code that
> helps document the secondary bootload process, which is a bit complex.
>
> But go ahead, knock yourself out, put in DMA support :-) If your manager
> asks what
> you're doing, just say "learning" otherwise you can't justify the wasted
> time.
>
> -Jeff
>
> > ;* ------------------- File: boot.asm -------------------------------
> > ----*
> > .title "Flash bootup utility for 6713 dsk"
> > .option D,T
> > .length 102
> > .width 140
> >
> > ; global EMIF symbols defined for the c671x family
> > .include boot_c671x.h62
> >
> > .sect ".boot_load"
> > .global _boot
> > .global ___binit__
> >
> > .ref _c_int00
> >
> > _boot:
> > ;********************************************************************
> > ****
> > ;* DEBUG LOOP - COMMENT OUT B FOR NORMAL OPERATION
> > ;********************************************************************
> > ****
> >
> > zero B1
> > _myloop: ; [!B1] B _myloop
> > nop 5
> > _myloopend: nop
> >
> > ;********************************************************************
> > ****
> > ;* CONFIGURE EMIF
> > ;********************************************************************
> > ****
> >
> > ;************************************************************
> > ****
> > ; *EMIF_GCTL = EMIF_GCTL_V;
> > ;************************************************************
> > ****
> > mvkl EMIF_GCTL,A4
> > || mvkl EMIF_GCTL_V,B4
> >
> > mvkh EMIF_GCTL,A4
> > || mvkh EMIF_GCTL_V,B4
> >
> > stw B4,*A4
> >
> > ;************************************************************
> > ****
> > ; *EMIF_CE0 = EMIF_CE0_V
> > ;************************************************************
> > ****
> > mvkl EMIF_CE0,A4
> > || mvkl EMIF_CE0_V,B4
> >
> > mvkh EMIF_CE0,A4
> > || mvkh EMIF_CE0_V,B4
> >
> > stw B4,*A4
> >
> > ;************************************************************
> > ****
> > ; *EMIF_CE1 = EMIF_CE1_V (setup for 8-bit async)
> > ;************************************************************
> > ***
> > mvkl EMIF_CE1,A4
> > || mvkl EMIF_CE1_V,B4
> >
> > mvkh EMIF_CE1,A4
> > || mvkh EMIF_CE1_V,B4
> >
> > stw B4,*A4
> >
> > ;************************************************************
> > ****
> > ; *EMIF_CE2 = EMIF_CE2_V (setup for 32-bit async)
> > ;************************************************************
> > ****
> > mvkl EMIF_CE2,A4
> > || mvkl EMIF_CE2_V,B4
> >
> > mvkh EMIF_CE2,A4
> > || mvkh EMIF_CE2_V,B4
> >
> > stw B4,*A4
> >
> > ;************************************************************
> > ****
> > ; *EMIF_CE3 = EMIF_CE3_V (setup for 32-bit async)
> > ;************************************************************
> > ****
> > mvkl EMIF_CE3,A4
> > || mvkl EMIF_CE3_V,B4 ;
> >
> > mvkh EMIF_CE3,A4
> > || mvkh EMIF_CE3_V,B4
> >
> > stw B4,*A4
> >
> > ;************************************************************
> > ****
> > ; *EMIF_SDRAMCTL = EMIF_SDRAMCTL_V
> > ;************************************************************
> > ****
> > mvkl EMIF_SDRAMCTL,A4
> > || mvkl EMIF_SDRAMCTL_V,B4 ;
> >
> > mvkh EMIF_SDRAMCTL,A4
> > || mvkh EMIF_SDRAMCTL_V,B4
> >
> > stw B4,*A4
> >
> > ;************************************************************
> > ****
> > ; *EMIF_SDRAMTIM = EMIF_SDRAMTIM_V
> > ;************************************************************
> > ****
> > mvkl EMIF_SDRAMTIM,A4
> > || mvkl EMIF_SDRAMTIM_V,B4 ;
> >
> > mvkh EMIF_SDRAMTIM,A4
> > || mvkh EMIF_SDRAMTIM_V,B4
> >
> > stw B4,*A4
> >
> > ;************************************************************
> > ****
> > ; *EMIF_SDRAMEXT = EMIF_SDRAMEXT_V
> > ;************************************************************
> > ****
> > mvkl EMIF_SDRAMEXT,A4
> > || mvkl EMIF_SDRAMEXT_V,B4 ;
> >
> > mvkh EMIF_SDRAMEXT,A4
> > || mvkh EMIF_SDRAMEXT_V,B4
> >
> > stw B4,*A4
> >
> > ; copy sections using linker generated copy table.
> > ; ___binit__ is the start address of the linker generated copy table
> >
> > mvkl ___binit__, a3 ; load copy table pointer
> > mvkh ___binit__, a3
> >
> > ldw *a3++, b0 ; load first word of copy table
> > (record sz)
> > nop 4
> > shr b0, 16, b1 ; right shift to get the count (num
> > records)
> >
> > copy_section_top:
> > ldw *a3++, b4 ; load flash start (load) address
> > ldw *a3++, a4 ; load ram start (run) address
> > ldw *a3++, b0 ; byte count (size)
> > nop 2
> >
> > [!b1] b copy_done ; have we copied all sections?
> > nop 5
> >
> > copy_loop:
> > ldb *B4++,B5
> > sub b0,1,b0 ; decrement counter
> > [b0] b copy_loop ; setup branch if not done
> > nop 2
> >
> > stb B5,*A4++
> > nop 2
> >
> > b copy_section_top
> > sub b1,1,b1 ; decrement counter
> > nop 4
> >
> > ; jump to _c_int00
> > copy_done:
> > mvkl .S2 _c_int00, B0
> > mvkh .S2 _c_int00, B0
> > b .S2 B0
> > nop 5
> >
> > ;; .boot_load don't need to copy, will happen automatically upon
> > dsp reset
>
> ------------
> Be a better friend, newshound, and know-it-all with Yahoo! Mobile. Try it now.
Truong-
> Obviously, the boot device is DMA the first 1K so I don't really think you
> understand enough about DMA

The reason EDMA is used for the first 1k is there are no instructions yet; i.e. no
CPU. Note also that the 1k/secondary boot method is not used on newer C6x devices
that contain onchip bootloader ROM (641x, 645x, etc). If you study the source code
for TI's bootloader ROMs, you will not see DMA being used. I guess that ought to
tell you something, at least if you consider TI the experts.

Again, what you should be learning is the *overall* architecture of the chip and
software. It seems like someone assigned you DMA as a Sr. Project and somehow you
got focused in on DMA. You shouldn't worry about DMA just because it "sounds good"
or you read it somewhere. Only implement DMA when necessary -- when it gives you a
meaningful performance increase.

-Jeff

PS. Please post to the group, not to me.
> Jeff Brower wrote:
>
> Truong-
> > In your application maybe, but in mine the requirement is extremely
> > stringent for the time line. So no my management will spend all the
> > money needed to do it no waste here.
> That I doubt. During boot, you don't need the CPU and DMA to share
> onchip internal mem busses -- which is the point of DMA. Also:
> -transfer time will be limited by the access time
> of the Flash, which is an async device -L2 cache won't help since
> every SDRAM location
> is a one-time write during bootClearly the boot process has time
> limitation factors that DMA cannot meaningfully reduce.You need to take
> some time to understand the purpose of DMA and fully understand all of
> the various chip architectures you are working with. If your manager who
> is going to spend "all the money" is reading this he's going to wonder
> about how you spend your time and also your communication skills. Take
> my word for that as a manager.-JeffPS. Please post to the group, not to
> me.
>
> > Jeff Brower wrote:
> >
> > kingalbertv1-> TI showing example of secondary bootloader to
> > copy code from Flash
> > > to SRAM and SDRAM. It seems that the code copy word by word
> > from
> > > flash to SRAM and SDRAM. Isn't it more efficient to use DMA
> > to move
> > > big blocks. How does one do this if DMA is to be used
> > instead of
> > > copying word by word.Since boot occurs one-time only, I
> > doubt the authors of the secondary bootloader
> > cared much about efficiency. They would have cared more about
> > easy-to-read code that
> > helps document the secondary bootload process, which is a bit
> > complex.But go ahead, knock yourself out, put in DMA support
> > :-) If your manager asks what
> > you're doing, just say "learning" otherwise you can't justify
> > the wasted time.-Jeff> ;* ------------------- File: boot.asm
> > -------------------------------
> > > ----*
> > > .title "Flash bootup utility for 6713 dsk"
> > > .option D,T
> > > .length 102
> > > .width 140
> > >
> > > ; global EMIF symbols defined for the c671x family
> > > .include boot_c671x.h62
> > >
> > > .sect ".boot_load"
> > > .global _boot
> > > .global ___binit__
> > >
> > > .ref _c_int00
> > >
> > > _boot:
> > >
> > ;********************************************************************
> >
> > > ****
> > > ;* DEBUG LOOP - COMMENT OUT B FOR NORMAL OPERATION
> > >
> > ;********************************************************************
> >
> > > ****
> > >
> > > zero B1
> > > _myloop: ; [!B1] B _myloop
> > > nop 5
> > > _myloopend: nop
> > >
> > >
> > ;********************************************************************
> >
> > > ****
> > > ;* CONFIGURE EMIF
> > >
> > ;********************************************************************
> >
> > > ****
> > >
> > >
> > ;************************************************************
> >
> > > ****
> > > ; *EMIF_GCTL = EMIF_GCTL_V;
> > >
> > ;************************************************************
> >
> > > ****
> > > mvkl EMIF_GCTL,A4
> > > || mvkl EMIF_GCTL_V,B4
> > >
> > > mvkh EMIF_GCTL,A4
> > > || mvkh EMIF_GCTL_V,B4
> > >
> > > stw B4,*A4
> > >
> > >
> > ;************************************************************
> >
> > > ****
> > > ; *EMIF_CE0 = EMIF_CE0_V
> > >
> > ;************************************************************
> >
> > > ****
> > > mvkl EMIF_CE0,A4
> > > || mvkl EMIF_CE0_V,B4
> > >
> > > mvkh EMIF_CE0,A4
> > > || mvkh EMIF_CE0_V,B4
> > >
> > > stw B4,*A4
> > >
> > >
> > ;************************************************************
> >
> > > ****
> > > ; *EMIF_CE1 = EMIF_CE1_V (setup for 8-bit async)
> > >
> > ;************************************************************
> >
> > > ***
> > > mvkl EMIF_CE1,A4
> > > || mvkl EMIF_CE1_V,B4
> > >
> > > mvkh EMIF_CE1,A4
> > > || mvkh EMIF_CE1_V,B4
> > >
> > > stw B4,*A4
> > >
> > >
> > ;************************************************************
> >
> > > ****
> > > ; *EMIF_CE2 = EMIF_CE2_V (setup for 32-bit async)
> > >
> > ;************************************************************
> >
> > > ****
> > > mvkl EMIF_CE2,A4
> > > || mvkl EMIF_CE2_V,B4
> > >
> > > mvkh EMIF_CE2,A4
> > > || mvkh EMIF_CE2_V,B4
> > >
> > > stw B4,*A4
> > >
> > >
> > ;************************************************************
> >
> > > ****
> > > ; *EMIF_CE3 = EMIF_CE3_V (setup for 32-bit async)
> > >
> > ;************************************************************
> >
> > > ****
> > > mvkl EMIF_CE3,A4
> > > || mvkl EMIF_CE3_V,B4 ;
> > >
> > > mvkh EMIF_CE3,A4
> > > || mvkh EMIF_CE3_V,B4
> > >
> > > stw B4,*A4
> > >
> > >
> > ;************************************************************
> >
> > > ****
> > > ; *EMIF_SDRAMCTL = EMIF_SDRAMCTL_V
> > >
> > ;************************************************************
> >
> > > ****
> > > mvkl EMIF_SDRAMCTL,A4
> > > || mvkl EMIF_SDRAMCTL_V,B4 ;
> > >
> > > mvkh EMIF_SDRAMCTL,A4
> > > || mvkh EMIF_SDRAMCTL_V,B4
> > >
> > > stw B4,*A4
> > >
> > >
> > ;************************************************************
> >
> > > ****
> > > ; *EMIF_SDRAMTIM = EMIF_SDRAMTIM_V
> > >
> > ;************************************************************
> >
> > > ****
> > > mvkl EMIF_SDRAMTIM,A4
> > > || mvkl EMIF_SDRAMTIM_V,B4 ;
> > >
> > > mvkh EMIF_SDRAMTIM,A4
> > > || mvkh EMIF_SDRAMTIM_V,B4
> > >
> > > stw B4,*A4
> > >
> > >
> > ;************************************************************
> >
> > > ****
> > > ; *EMIF_SDRAMEXT = EMIF_SDRAMEXT_V
> > >
> > ;************************************************************
> >
> > > ****
> > > mvkl EMIF_SDRAMEXT,A4
> > > || mvkl EMIF_SDRAMEXT_V,B4 ;
> > >
> > > mvkh EMIF_SDRAMEXT,A4
> > > || mvkh EMIF_SDRAMEXT_V,B4
> > >
> > > stw B4,*A4
> > >
> > > ; copy sections using linker generated copy table.
> > > ; ___binit__ is the start address of the linker generated
> > copy table
> > >
> > > mvkl ___binit__, a3 ; load copy table pointer
> > > mvkh ___binit__, a3
> > >
> > > ldw *a3++, b0 ; load first word of copy table
> > > (record sz)
> > > nop 4
> > > shr b0, 16, b1 ; right shift to get the count (num
> > > records)
> > >
> > > copy_section_top:
> > > ldw *a3++, b4 ; load flash start (load) address
> > > ldw *a3++, a4 ; load ram start (run) address
> > > ldw *a3++, b0 ; byte count (size)
> > > nop 2
> > >
> > > [!b1] b copy_done ; have we copied all sections?
> > > nop 5
> > >
> > > copy_loop:
> > > ldb *B4++,B5
> > > sub b0,1,b0 ; decrement counter
> > > [b0] b copy_loop ; setup branch if not done
> > > nop 2
> > >
> > > stb B5,*A4++
> > > nop 2
> > >
> > > b copy_section_top
> > > sub b1,1,b1 ; decrement counter
> > > nop 4
> > >
> > > ; jump to _c_int00
> > > copy_done:
> > > mvkl .S2 _c_int00, B0
> > > mvkh .S2 _c_int00, B0
> > > b .S2 B0
> > > nop 5
> > >
> > > ;; .boot_load don't need to copy, will happen automatically
> > upon
> > > dsp reset
> >
Truong-
> I sent it to you b/c your opion has no benefit to the forum. You have no answer to
> my question only that I should do it. So, I figure you have no clue how to do it.
> How is that benefit the forum?

Search the archives if you're learning DMA for an academic exercise. Or search the
dsprelated.com site for EDMA and QDMA.

For other students learning to program DSP: this guy is a great example of what not
to do. This is exactly how you receive no help and end up on an island. For this
guy, it's like the Internet never existed -- doesn't know how to use it, gets no
benefit from it, after 3 days he hasn't made any progress solving his problem.

-Jeff

PS. For the fourth time, post to the group not to individuals.
> Jeff Brower wrote:
>
> Truong-
> > Obviously, the boot device is DMA the first 1K so I don't really think
> > you understand enough about DMA
> The reason EDMA is used for the first 1k is there are no instructions
> yet; i.e. no CPU. Note also that the 1k/secondary boot method is not
> used on newer C6x devices that contain onchip bootloader ROM (641x, 645x,
> etc). If you study the source code for TI's bootloader ROMs, you will
> not see DMA being used. I guess that ought to tell you something, at
> least if you consider TI the experts.Again, what you should be learning
> is the *overall* architecture of the chip and software. It seems like
> someone assigned you DMA as a Sr. Project and somehow you got focused in
> on DMA. You shouldn't worry about DMA just because it "sounds good" or
> you read it somewhere. Only implement DMA when necessary -- when it
> gives you a meaningful performance increase.-JeffPS. Please post to the
> group, not to me.
> > Jeff Brower wrote:
> >
> > Truong-
> >
> >
> > > In your application maybe, but in mine the requirement is
> > > extremely stringent for the time line. So no my management
> > > will spend all the money needed to do it no waste here.
> >
> >
> > That I doubt. During boot, you don't need the CPU and DMA to
> > share onchip internal mem busses -- which is the point of
> > DMA. Also: -transfer time will be limited by the access
> > time
> > of the Flash, which is an async device -L2 cache won't
> > help since every SDRAM location
> > is a one-time write during bootClearly the boot process
> > has time limitation factors that DMA cannot meaningfully
> > reduce.You need to take some time to understand the purpose
> > of DMA and fully understand all of the various chip
> > architectures you are working with. If your manager who is
> > going to spend "all the money" is reading this he's going to
> > wonder about how you spend your time and also your
> > communication skills. Take my word for that as a
> > manager.-JeffPS. Please post to the group, not to me.
> >
> >
> >
> > > Jeff Brower wrote:
> > >
> > > kingalbertv1-> TI showing example of secondary
> > > bootloader to copy code from Flash
> > > > to SRAM and SDRAM. It seems that the code copy
> > > word by word from
> > > > flash to SRAM and SDRAM. Isn't it more efficient
> > > to use DMA to move
> > > > big blocks. How does one do this if DMA is to be
> > > used instead of
> > > > copying word by word.Since boot occurs one-time
> > > only, I doubt the authors of the secondary
> > > bootloader
> > > cared much about efficiency. They would have cared
> > > more about easy-to-read code that
> > > helps document the secondary bootload process,
> > > which is a bit complex.But go ahead, knock
> > > yourself out, put in DMA support :-) If your
> > > manager asks what
> > > you're doing, just say "learning" otherwise you
> > > can't justify the wasted time.-Jeff> ;*
> > > ------------------- File: boot.asm ------------
> > > -------------------
> > > > ----*
> > > > .title "Flash bootup utility for 6713 dsk"
> > > > .option D,T
> > > > .length 102
> > > > .width 140
> > > >
> > > > ; global EMIF symbols defined for the c671x
> > > family
> > > > .include boot_c671x.h62
> > > >
> > > > .sect ".boot_load"
> > > > .global _boot
> > > > .global ___binit__
> > > >
> > > > .ref _c_int00
> > > >
> > > > _boot:
> > > > ;***********************************************
> > > *********************
> > > > ****
> > > > ;* DEBUG LOOP - COMMENT OUT B FOR NORMAL
> > > OPERATION
> > > > ;***********************************************
> > > *********************
> > > > ****
> > > >
> > > > zero B1
> > > > _myloop: ; [!B1] B _myloop
> > > > nop 5
> > > > _myloopend: nop
> > > >
> > > > ;***********************************************
> > > *********************
> > > > ****
> > > > ;* CONFIGURE EMIF
> > > > ;***********************************************
> > > *********************
> > > > ****
> > > >
> > > > ;***********************************************
> > > *************
> > > > ****
> > > > ; *EMIF_GCTL = EMIF_GCTL_V;
> > > > ;***********************************************
> > > *************
> > > > ****
> > > > mvkl EMIF_GCTL,A4
> > > > || mvkl EMIF_GCTL_V,B4
> > > >
> > > > mvkh EMIF_GCTL,A4
> > > > || mvkh EMIF_GCTL_V,B4
> > > >
> > > > stw B4,*A4
> > > >
> > > > ;***********************************************
> > > *************
> > > > ****
> > > > ; *EMIF_CE0 = EMIF_CE0_V
> > > > ;***********************************************
> > > *************
> > > > ****
> > > > mvkl EMIF_CE0,A4
> > > > || mvkl EMIF_CE0_V,B4
> > > >
> > > > mvkh EMIF_CE0,A4
> > > > || mvkh EMIF_CE0_V,B4
> > > >
> > > > stw B4,*A4
> > > >
> > > > ;***********************************************
> > > *************
> > > > ****
> > > > ; *EMIF_CE1 = EMIF_CE1_V (setup for 8-bit async)
> > >
> > > > ;***********************************************
> > > *************
> > > > ***
> > > > mvkl EMIF_CE1,A4
> > > > || mvkl EMIF_CE1_V,B4
> > > >
> > > > mvkh EMIF_CE1,A4
> > > > || mvkh EMIF_CE1_V,B4
> > > >
> > > > stw B4,*A4
> > > >
> > > > ;***********************************************
> > > *************
> > > > ****
> > > > ; *EMIF_CE2 = EMIF_CE2_V (setup for 32-bit
> > > async)
> > > > ;***********************************************
> > > *************
> > > > ****
> > > > mvkl EMIF_CE2,A4
> > > > || mvkl EMIF_CE2_V,B4
> > > >
> > > > mvkh EMIF_CE2,A4
> > > > || mvkh EMIF_CE2_V,B4
> > > >
> > > > stw B4,*A4
> > > >
> > > > ;***********************************************
> > > *************
> > > > ****
> > > > ; *EMIF_CE3 = EMIF_CE3_V (setup for 32-bit
> > > async)
> > > > ;***********************************************
> > > *************
> > > > ****
> > > > mvkl EMIF_CE3,A4
> > > > || mvkl EMIF_CE3_V,B4 ;
> > > >
> > > > mvkh EMIF_CE3,A4
> > > > || mvkh EMIF_CE3_V,B4
> > > >
> > > > stw B4,*A4
> > > >
> > > > ;***********************************************
> > > *************
> > > > ****
> > > > ; *EMIF_SDRAMCTL = EMIF_SDRAMCTL_V
> > > > ;***********************************************
> > > *************
> > > > ****
> > > > mvkl EMIF_SDRAMCTL,A4
> > > > || mvkl EMIF_SDRAMCTL_V,B4 ;
> > > >
> > > > mvkh EMIF_SDRAMCTL,A4
> > > > || mvkh EMIF_SDRAMCTL_V,B4
> > > >
> > > > stw B4,*A4
> > > >
> > > > ;***********************************************
> > > *************
> > > > ****
> > > > ; *EMIF_SDRAMTIM = EMIF_SDRAMTIM_V
> > > > ;***********************************************
> > > *************
> > > > ****
> > > > mvkl EMIF_SDRAMTIM,A4
> > > > || mvkl EMIF_SDRAMTIM_V,B4 ;
> > > >
> > > > mvkh EMIF_SDRAMTIM,A4
> > > > || mvkh EMIF_SDRAMTIM_V,B4
> > > >
> > > > stw B4,*A4
> > > >
> > > > ;***********************************************
> > > *************
> > > > ****
> > > > ; *EMIF_SDRAMEXT = EMIF_SDRAMEXT_V
> > > > ;***********************************************
> > > *************
> > > > ****
> > > > mvkl EMIF_SDRAMEXT,A4
> > > > || mvkl EMIF_SDRAMEXT_V,B4 ;
> > > >
> > > > mvkh EMIF_SDRAMEXT,A4
> > > > || mvkh EMIF_SDRAMEXT_V,B4
> > > >
> > > > stw B4,*A4
> > > >
> > > > ; copy sections using linker generated copy
> > > table.
> > > > ; ___binit__ is the start address of the linker
> > > generated copy table
> > > >
> > > > mvkl ___binit__, a3 ; load copy table pointer
> > > > mvkh ___binit__, a3
> > > >
> > > > ldw *a3++, b0 ; load first word of copy table
> > > > (record sz)
> > > > nop 4
> > > > shr b0, 16, b1 ; right shift to get the count
> > > (num
> > > > records)
> > > >
> > > > copy_section_top:
> > > > ldw *a3++, b4 ; load flash start (load) address
> > > > ldw *a3++, a4 ; load ram start (run) address
> > > > ldw *a3++, b0 ; byte count (size)
> > > > nop 2
> > > >
> > > > [!b1] b copy_done ; have we copied all sections?
> > >
> > > > nop 5
> > > >
> > > > copy_loop:
> > > > ldb *B4++,B5
> > > > sub b0,1,b0 ; decrement counter
> > > > [b0] b copy_loop ; setup branch if not done
> > > > nop 2
> > > >
> > > > stb B5,*A4++
> > > > nop 2
> > > >
> > > > b copy_section_top
> > > > sub b1,1,b1 ; decrement counter
> > > > nop 4
> > > >
> > > > ; jump to _c_int00
> > > > copy_done:
> > > > mvkl .S2 _c_int00, B0
> > > > mvkh .S2 _c_int00, B0
> > > > b .S2 B0
> > > > nop 5
> > > >
> > > > ;; .boot_load don't need to copy, will happen
> > > automatically upon
> > > > dsp reset
> > >
DMA is not faster than CPU. It just let the CPU do something else and at boot stage CPU doesn't have much to do.
But at the end of the day if you want to access external memory, CPU and DMA have both to do request through the same channel.
This is why TI engineers did not do it with DMA but rather with CPU.
This is why Jeff is trying to explain you what people that know the processor do recommand for a bootloader.

DMA copy needs half a day to develop, what is your result?
Did you try to optimise the ASM routines as well?
What is your processor frequency at that stage and what is the speed for accessing external memory?

To: k...@yahoo.comCC: c...From: j...@signalogic.comDate: Fri, 25 Apr 2008 07:59:05 -0500Subject: Re: [c6x] Secondary bootloader Using DMATruong-
I sent it to you b/c your opion has no benefit to the forum. You have no answer to my question only that I should do it. So, I figure you have no clue how to do it. How is that benefit the forum?
Search the archives if you're learning DMA for an academic exercise. Or search the dsprelated.com site for EDMA and QDMA.
For other students learning to program DSP: this guy is a great example of what not to do. This is exactly how you receive no help and end up on an island. For this guy, it's like the Internet never existed -- doesn't know how to use it, gets no benefit from it, after 3 days he hasn't made any progress solving his problem.
-Jeff
PS. For the fourth time, post to the group not to individuals.
Jeff Brower wrote:

Truong-
Obviously, the boot device is DMA the first 1K so I don't really think you understand enough about DMA The reason EDMA is used for the first 1k is there are no instructions yet; i.e. no CPU. Note also that the 1k/secondary boot method is not used on newer C6x devices that contain onchip bootloader ROM (641x, 645x, etc). If you study the source code for TI's bootloader ROMs, you will not see DMA being used. I guess that ought to tell you something, at least if you consider TI the experts.Again, what you should be learning is the *overall* architecture of the chip and software. It seems like someone assigned you DMA as a Sr. Project and somehow you got focused in on DMA. You shouldn't worry about DMA just because it "sounds good" or you read it somewhere. Only implement DMA when necessary -- when it gives you a meaningful performance increase.-JeffPS. Please post to the group, not to me.
Jeff Brower wrote:
Truong-
In your application maybe, but in mine the requirement is extremely stringent for the time line. So no my management will spend all the money needed to do it no waste here.
That I doubt. During boot, you don't need the CPU and DMA to share onchip internal mem busses -- which is the point of DMA. Also: -transfer time will be limited by the access time of the Flash, which is an async device -L2 cache won't help since every SDRAM location is a one-time write during bootClearly the boot process has time limitation factors that DMA cannot meaningfully reduce.You need to take some time to understand the purpose of DMA and fully understand all of the various chip architectures you are working with. If your manager who is going to spend "all the money" is reading this he's going to wonder about how you spend your time and also your communication skills. Take my word for that as a manager.-JeffPS. Please post to the group, not to me.
Jeff Brower wrote:
kingalbertv1-> TI showing example of secondary bootloader to copy code from Flash > to SRAM and SDRAM. It seems that the code copy word by word from > flash to SRAM and SDRAM. Isn't it more efficient to use DMA to move > big blocks. How does one do this if DMA is to be used instead of > copying word by word.Since boot occurs one-time only, I doubt the authors of the secondary bootloader cared much about efficiency. They would have cared more about easy-to-read code that helps document the secondary bootload process, which is a bit complex.But go ahead, knock yourself out, put in DMA support :-) If your manager asks what you're doing, just say "learning" otherwise you can't justify the wasted time.-Jeff> ;* ------------------- File: boot.asm ------------------------------- > ----* > .title "Flash bootup utility for 6713 dsk" > .option D,T > .length 102 > .width 140 > > ; global EMIF symbols defined for the c671x family > .include boot_c671x.h62 > > .sect ".boot_load" > .global _boot > .global ___binit__ > > .ref _c_int00 > > _boot: > ;******************************************************************** > **** > ;* DEBUG LOOP - COMMENT OUT B FOR NORMAL OPERATION > ;******************************************************************** > **** > > zero B1 > _myloop: ; [!B1] B _myloop > nop 5 > _myloopend: nop > > ;******************************************************************** > **** > ;* CONFIGURE EMIF > ;******************************************************************** > **** > > ;************************************************************ > **** > ; *EMIF_GCTL = EMIF_GCTL_V; > ;************************************************************ > **** > mvkl EMIF_GCTL,A4 > || mvkl EMIF_GCTL_V,B4 > > mvkh EMIF_GCTL,A4 > || mvkh EMIF_GCTL_V,B4 > > stw B4,*A4 > > ;************************************************************ > **** > ; *EMIF_CE0 = EMIF_CE0_V > ;************************************************************ > **** > mvkl EMIF_CE0,A4 > || mvkl EMIF_CE0_V,B4 > > mvkh EMIF_CE0,A4 > || mvkh EMIF_CE0_V,B4 > > stw B4,*A4 > > ;************************************************************ > **** > ; *EMIF_CE1 = EMIF_CE1_V (setup for 8-bit async) > ;************************************************************ > *** > mvkl EMIF_CE1,A4 > || mvkl EMIF_CE1_V,B4 > > mvkh EMIF_CE1,A4 > || mvkh EMIF_CE1_V,B4 > > stw B4,*A4 > > ;************************************************************ > **** > ; *EMIF_CE2 = EMIF_CE2_V (setup for 32-bit async) > ;************************************************************ > **** > mvkl EMIF_CE2,A4 > || mvkl EMIF_CE2_V,B4 > > mvkh EMIF_CE2,A4 > || mvkh EMIF_CE2_V,B4 > > stw B4,*A4 > > ;************************************************************ > **** > ; *EMIF_CE3 = EMIF_CE3_V (setup for 32-bit async) > ;************************************************************ > **** > mvkl EMIF_CE3,A4 > || mvkl EMIF_CE3_V,B4 ; > > mvkh EMIF_CE3,A4 > || mvkh EMIF_CE3_V,B4 > > stw B4,*A4 > > ;************************************************************ > **** > ; *EMIF_SDRAMCTL = EMIF_SDRAMCTL_V > ;************************************************************ > **** > mvkl EMIF_SDRAMCTL,A4 > || mvkl EMIF_SDRAMCTL_V,B4 ; > > mvkh EMIF_SDRAMCTL,A4 > || mvkh EMIF_SDRAMCTL_V,B4 > > stw B4,*A4 > > ;************************************************************ > **** > ; *EMIF_SDRAMTIM = EMIF_SDRAMTIM_V > ;************************************************************ > **** > mvkl EMIF_SDRAMTIM,A4 > || mvkl EMIF_SDRAMTIM_V,B4 ; > > mvkh EMIF_SDRAMTIM,A4 > || mvkh EMIF_SDRAMTIM_V,B4 > > stw B4,*A4 > > ;************************************************************ > **** > ; *EMIF_SDRAMEXT = EMIF_SDRAMEXT_V > ;************************************************************ > **** > mvkl EMIF_SDRAMEXT,A4 > || mvkl EMIF_SDRAMEXT_V,B4 ; > > mvkh EMIF_SDRAMEXT,A4 > || mvkh EMIF_SDRAMEXT_V,B4 > > stw B4,*A4 > > ; copy sections using linker generated copy table. > ; ___binit__ is the start address of the linker generated copy table > > mvkl ___binit__, a3 ; load copy table pointer > mvkh ___binit__, a3 > > ldw *a3++, b0 ; load first word of copy table > (record sz) > nop 4 > shr b0, 16, b1 ; right shift to get the count (num > records) > > copy_section_top: > ldw *a3++, b4 ; load flash start (load) address > ldw *a3++, a4 ; load ram start (run) address > ldw *a3++, b0 ; byte count (size) > nop 2 > > [!b1] b copy_done ; have we copied all sections? > nop 5 > > copy_loop: > ldb *B4++,B5 > sub b0,1,b0 ; decrement counter > [b0] b copy_loop ; setup branch if not done > nop 2 > > stb B5,*A4++ > nop 2 > > b copy_section_top > sub b1,1,b1 ; decrement counter > nop 4 > > ; jump to _c_int00 > copy_done: > mvkl .S2 _c_int00, B0 > mvkh .S2 _c_int00, B0 > b .S2 B0 > nop 5 > > ;; .boot_load don't need to copy, will happen automatically upon > dsp reset