initial commit
This commit is contained in:
27
BOARDS/arty.xdc
Normal file
27
BOARDS/arty.xdc
Normal file
@@ -0,0 +1,27 @@
|
||||
# Clock pin
|
||||
set_property PACKAGE_PIN E3 [get_ports CLK]
|
||||
set_property IOSTANDARD LVCMOS33 [get_ports CLK]
|
||||
|
||||
# LEDs
|
||||
set_property PACKAGE_PIN H5 [get_ports LEDS[0]]
|
||||
set_property PACKAGE_PIN J5 [get_ports LEDS[1]]
|
||||
set_property PACKAGE_PIN T9 [get_ports LEDS[2]]
|
||||
set_property PACKAGE_PIN T10 [get_ports LEDS[3]]
|
||||
set_property IOSTANDARD LVCMOS33 [get_ports LEDS[0]]
|
||||
set_property IOSTANDARD LVCMOS33 [get_ports LEDS[1]]
|
||||
set_property IOSTANDARD LVCMOS33 [get_ports LEDS[2]]
|
||||
set_property IOSTANDARD LVCMOS33 [get_ports LEDS[3]]
|
||||
|
||||
# Clock constraints
|
||||
create_clock -period 10.0 [get_ports CLK]
|
||||
|
||||
# UART
|
||||
set_property LOC D10 [get_ports TXD]
|
||||
set_property LOC A9 [get_ports RXD]
|
||||
set_property IOSTANDARD LVCMOS33 [get_ports RXD]
|
||||
set_property IOSTANDARD LVCMOS33 [get_ports TXD]
|
||||
|
||||
# reset button
|
||||
set_property LOC C2 [get_ports RESET]
|
||||
set_property IOSTANDARD LVCMOS33 [get_ports RESET]
|
||||
|
29
BOARDS/cmod_a7.xdc
Normal file
29
BOARDS/cmod_a7.xdc
Normal file
@@ -0,0 +1,29 @@
|
||||
# Clock pin
|
||||
set_property PACKAGE_PIN L17 [get_ports CLK]
|
||||
set_property IOSTANDARD LVCMOS33 [get_ports CLK]
|
||||
|
||||
# LEDs
|
||||
set_property PACKAGE_PIN A17 [get_ports LEDS[0]]
|
||||
set_property PACKAGE_PIN C16 [get_ports LEDS[1]]
|
||||
set_property PACKAGE_PIN B17 [get_ports LEDS[2]]
|
||||
set_property PACKAGE_PIN B16 [get_ports LEDS[3]]
|
||||
set_property PACKAGE_PIN C17 [get_ports LEDS[4]]
|
||||
set_property IOSTANDARD LVCMOS33 [get_ports LEDS[0]]
|
||||
set_property IOSTANDARD LVCMOS33 [get_ports LEDS[1]]
|
||||
set_property IOSTANDARD LVCMOS33 [get_ports LEDS[2]]
|
||||
set_property IOSTANDARD LVCMOS33 [get_ports LEDS[3]]
|
||||
set_property IOSTANDARD LVCMOS33 [get_ports LEDS[4]]
|
||||
|
||||
# Clock constraints
|
||||
create_clock -period 83.33 [get_ports CLK]
|
||||
|
||||
# UART
|
||||
set_property LOC G17 [get_ports TXD]
|
||||
set_property LOC G19 [get_ports RXD]
|
||||
set_property IOSTANDARD LVCMOS33 [get_ports RXD]
|
||||
set_property IOSTANDARD LVCMOS33 [get_ports TXD]
|
||||
|
||||
# reset button
|
||||
set_property LOC A18 [get_ports RESET]
|
||||
set_property IOSTANDARD LVCMOS33 [get_ports RESET]
|
||||
|
35
BOARDS/ecp5_evn.lpf
Normal file
35
BOARDS/ecp5_evn.lpf
Normal file
@@ -0,0 +1,35 @@
|
||||
# See https://github.com/emard/ulx3s/blob/master/doc/constraints/ulx3s_v20.lpf
|
||||
|
||||
## Clock #########################################
|
||||
|
||||
LOCATE COMP "CLK" SITE "A10";
|
||||
IOBUF PORT "CLK" IO_TYPE=LVCMOS33;
|
||||
FREQUENCY PORT "CLK" 12 MHZ;
|
||||
|
||||
## RESET button ##################################
|
||||
|
||||
LOCATE COMP "RESET" SITE "P4";
|
||||
IOBUF PORT "RESET" IO_TYPE=LVCMOS33;
|
||||
|
||||
## LEDs ##########################################
|
||||
|
||||
LOCATE COMP "LEDS[0]" SITE "B17";
|
||||
LOCATE COMP "LEDS[1]" SITE "A17";
|
||||
LOCATE COMP "LEDS[2]" SITE "C17";
|
||||
LOCATE COMP "LEDS[3]" SITE "B18";
|
||||
LOCATE COMP "LEDS[4]" SITE "A18";
|
||||
|
||||
IOBUF PORT "LEDS[0]" IO_TYPE=LVCMOS33;
|
||||
IOBUF PORT "LEDS[1]" IO_TYPE=LVCMOS33;
|
||||
IOBUF PORT "LEDS[2]" IO_TYPE=LVCMOS33;
|
||||
IOBUF PORT "LEDS[3]" IO_TYPE=LVCMOS33;
|
||||
IOBUF PORT "LEDS[4]" IO_TYPE=LVCMOS33;
|
||||
|
||||
## UART ######################################################
|
||||
|
||||
LOCATE COMP "TXD" SITE "D11";
|
||||
LOCATE COMP "RXD" SITE "D12";
|
||||
|
||||
IOBUF PORT "TXD" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
|
||||
IOBUF PORT "RXD" PULLMODE=UP IO_TYPE=LVCMOS33;
|
||||
|
13
BOARDS/icebreaker.pcf
Normal file
13
BOARDS/icebreaker.pcf
Normal file
@@ -0,0 +1,13 @@
|
||||
set_io CLK 35
|
||||
|
||||
set_io LEDS[0] 27
|
||||
set_io LEDS[1] 21
|
||||
set_io LEDS[2] 25
|
||||
set_io LEDS[3] 23
|
||||
set_io LEDS[4] 26
|
||||
|
||||
set_io TXD 9
|
||||
set_io RXD 6
|
||||
|
||||
set_io RESET 10
|
||||
|
21
BOARDS/icestick.pcf
Normal file
21
BOARDS/icestick.pcf
Normal file
@@ -0,0 +1,21 @@
|
||||
set_io CLK 21
|
||||
|
||||
set_io LEDS[0] 99
|
||||
set_io LEDS[1] 98
|
||||
set_io LEDS[2] 97
|
||||
set_io LEDS[3] 96
|
||||
set_io LEDS[4] 95
|
||||
|
||||
set_io TXD 8
|
||||
set_io RXD 9
|
||||
|
||||
set_io SPIFLASH_CLK 70
|
||||
set_io SPIFLASH_CS_N 71
|
||||
|
||||
set_io SPIFLASH_MOSI 67
|
||||
set_io SPIFLASH_MISO 68
|
||||
|
||||
set_io SPIFLASH_IO[0] 67
|
||||
set_io SPIFLASH_IO[1] 68
|
||||
|
||||
set_io RESET 47
|
18
BOARDS/run_arty.sh
Executable file
18
BOARDS/run_arty.sh
Executable file
@@ -0,0 +1,18 @@
|
||||
#!/usr/bin/env bash
|
||||
PROJECT_NAME=SOC
|
||||
DB_DIR=/usr/share/nextpnr/prjxray-db
|
||||
CHIPDB_DIR=/usr/share/nextpnr/xilinx-chipdb
|
||||
PART=xc7a35tcsg324-1
|
||||
VERILOGS=$1
|
||||
BOARD_FREQ=100
|
||||
CPU_FREQ=100
|
||||
|
||||
set -ex
|
||||
yosys -DARTY -DBOARD_FREQ=$BOARD_FREQ -DCPU_FREQ=$CPU_FREQ -p "scratchpad -set xilinx_dsp.multonly 1" -p "synth_xilinx -nowidelut -flatten -abc9 -arch xc7 -top SOC; write_json ${PROJECT_NAME}.json" ${VERILOGS}
|
||||
nextpnr-xilinx --chipdb ${CHIPDB_DIR}/xc7a35t.bin --xdc BOARDS/arty.xdc --json ${PROJECT_NAME}.json --write ${PROJECT_NAME}_routed.json --fasm ${PROJECT_NAME}.fasm
|
||||
fasm2frames --part ${PART} --db-root ${DB_DIR}/artix7 ${PROJECT_NAME}.fasm > ${PROJECT_NAME}.frames
|
||||
xc7frames2bit --part_file ${DB_DIR}/artix7/${PART}/part.yaml --part_name ${PART} --frm_file ${PROJECT_NAME}.frames --output_file ${PROJECT_NAME}.bit
|
||||
#To send to SRAM:
|
||||
openFPGALoader --board arty ${PROJECT_NAME}.bit
|
||||
#To send to FLASH:
|
||||
#openFPGALoader --board arty -f ${PROJECT_NAME}.bit
|
18
BOARDS/run_cmod_a7.sh
Executable file
18
BOARDS/run_cmod_a7.sh
Executable file
@@ -0,0 +1,18 @@
|
||||
#!/usr/bin/env bash
|
||||
PROJECT_NAME=SOC
|
||||
DB_DIR=/usr/share/nextpnr/prjxray-db
|
||||
CHIPDB_DIR=/usr/share/nextpnr/xilinx-chipdb
|
||||
PART=xc7a35tcpg236-1
|
||||
VERILOGS=$1
|
||||
BOARD_FREQ=100
|
||||
CPU_FREQ=100
|
||||
|
||||
set -ex
|
||||
yosys -DCMODA7 -DBOARD_FREQ=$BOARD_FREQ -DCPU_FREQ=$CPU_FREQ -p "scratchpad -set xilinx_dsp.multonly 1" -p "synth_xilinx -nowidelut -flatten -abc9 -arch xc7 -top SOC; write_json ${PROJECT_NAME}.json" ${VERILOGS}
|
||||
nextpnr-xilinx --chipdb ${CHIPDB_DIR}/xc7a35tcpg236-1.bin --xdc BOARDS/cmod_a7.xdc --json ${PROJECT_NAME}.json --write ${PROJECT_NAME}_routed.json --fasm ${PROJECT_NAME}.fasm
|
||||
fasm2frames --part ${PART} --db-root ${DB_DIR}/artix7 ${PROJECT_NAME}.fasm > ${PROJECT_NAME}.frames
|
||||
xc7frames2bit --part_file ${DB_DIR}/artix7/${PART}/part.yaml --part_name ${PART} --frm_file ${PROJECT_NAME}.frames --output_file ${PROJECT_NAME}.bit
|
||||
#To send to SRAM:
|
||||
openFPGALoader --freq 30e6 -c digilent --fpga-part xc7a35 femtosoc.bit
|
||||
#To send to FLASH:
|
||||
# openFPGALoader --freq 30e6 -c digilent --fpga-part xc7a35tcpg236 -f femtosoc.bit
|
13
BOARDS/run_ecp5evn.sh
Executable file
13
BOARDS/run_ecp5evn.sh
Executable file
@@ -0,0 +1,13 @@
|
||||
PROJECTNAME=SOC
|
||||
BOARD=ecp5_evn
|
||||
BOARD_FREQ=12
|
||||
CPU_FREQ=100
|
||||
FPGA_VARIANT=um5g-85k
|
||||
FPGA_PACKAGE=CABGA381
|
||||
VERILOGS=$1
|
||||
|
||||
yosys -q -DECP5_EVN -DBOARD_FREQ=$BOARD_FREQ -DCPU_FREQ=$CPU_FREQ -p "synth_ecp5 -abc9 -top $PROJECTNAME -json $PROJECTNAME.json" $VERILOGS || exit
|
||||
nextpnr-ecp5 --force --timing-allow-fail --json $PROJECTNAME.json --lpf BOARDS/$BOARD.lpf --textcfg $PROJECTNAME"_out".config --freq $BOARD_FREQ --$FPGA_VARIANT --package $FPGA_PACKAGE || exit
|
||||
ecppack --compress --svf-rowsize 100000 --svf $PROJECTNAME".svf" $PROJECTNAME"_out.config" $PROJECTNAME".bit" || exit
|
||||
ujprog -j FLASH $PROJECTNAME".bit" || exit
|
||||
|
30
BOARDS/run_gowin.sh
Executable file
30
BOARDS/run_gowin.sh
Executable file
@@ -0,0 +1,30 @@
|
||||
#!/bin/bash
|
||||
|
||||
# --- CONFIGURATION ---
|
||||
PROJECTNAME=SOC
|
||||
DEVICE='GW2A-LV18PG256C8/I7'
|
||||
BOARD='tangprimer20k'
|
||||
BOARD_FREQ=27
|
||||
CPU_FREQ=50
|
||||
VERILOGS=$1
|
||||
|
||||
# --- Synthesis with Yosys ---
|
||||
yosys -q -DPRIMER20K -DBOARD_FREQ=$BOARD_FREQ -DCPU_FREQ=$CPU_FREQ -D INV_BTN=0 -p "
|
||||
read_verilog $VERILOGS;
|
||||
synth_gowin -top $PROJECTNAME -json $PROJECTNAME.json -family gw2a" || exit 1
|
||||
|
||||
# --- Placement and Routing with nextpnr-himbaechel ---
|
||||
nextpnr-himbaechel \
|
||||
--json $PROJECTNAME.json \
|
||||
--write $PROJECTNAME"_pnr.json" \
|
||||
--device $DEVICE \
|
||||
--vopt cst=BOARDS/$BOARD.cst \
|
||||
--vopt family=GW2A-18 \
|
||||
--freq $BOARD_FREQ || exit 1
|
||||
|
||||
# --- Bitstream Packing with gowin_pack ---
|
||||
gowin_pack -d $DEVICE -o $PROJECTNAME.fs $PROJECTNAME"_pnr.json" || exit 1
|
||||
|
||||
# --- Programming with openFPGALoader ---
|
||||
openFPGALoader -b tangprimer20k $PROJECTNAME.fs || exit 1
|
||||
|
14
BOARDS/run_icebreaker.sh
Executable file
14
BOARDS/run_icebreaker.sh
Executable file
@@ -0,0 +1,14 @@
|
||||
PROJECTNAME=SOC
|
||||
BOARD=icebreaker
|
||||
BOARD_FREQ=12
|
||||
CPU_FREQ=20
|
||||
FPGA_VARIANT=up5k
|
||||
FPGA_PACKAGE=sg48
|
||||
VERILOGS=$1
|
||||
yosys -q -DICE_BREAKER -DNEGATIVE_RESET -DBOARD_FREQ=$BOARD_FREQ -DCPU_FREQ=$CPU_FREQ -p "synth_ice40 -abc9 -device u -dsp -top $PROJECTNAME -json $PROJECTNAME.json" $VERILOGS || exit
|
||||
nextpnr-ice40 --force --json $PROJECTNAME.json --pcf BOARDS/$BOARD.pcf --asc $PROJECTNAME.asc --freq $BOARD_FREQ --$FPGA_VARIANT --package $FPGA_PACKAGE --pcf-allow-unconstrained || exit
|
||||
icetime -p BOARDS/$BOARD.pcf -P $FPGA_PACKAGE -r $PROJECTNAME.timings -d up5k -t $PROJECTNAME.asc
|
||||
icepack $PROJECTNAME.asc $PROJECTNAME.bin || exit
|
||||
iceprog $PROJECTNAME.bin || exit
|
||||
echo DONE.
|
||||
|
14
BOARDS/run_icestick.sh
Executable file
14
BOARDS/run_icestick.sh
Executable file
@@ -0,0 +1,14 @@
|
||||
PROJECTNAME=SOC
|
||||
BOARD=icestick
|
||||
BOARD_FREQ=12
|
||||
CPU_FREQ=45
|
||||
FPGA_VARIANT=hx1k
|
||||
FPGA_PACKAGE=tq144
|
||||
VERILOGS=$1
|
||||
yosys -q -DICE_STICK -DBOARD_FREQ=$BOARD_FREQ -DCPU_FREQ=$CPU_FREQ -p "synth_ice40 -relut -top $PROJECTNAME -json $PROJECTNAME.json" $VERILOGS || exit
|
||||
nextpnr-ice40 --force --timing-allow-fail --json $PROJECTNAME.json --pcf BOARDS/$BOARD.pcf --asc $PROJECTNAME.asc --freq $CPU_FREQ --$FPGA_VARIANT --package $FPGA_PACKAGE --pcf-allow-unconstrained --opt-timing || exit
|
||||
icetime -p BOARDS/$BOARD.pcf -P $FPGA_PACKAGE -r $PROJECTNAME.timings -d hx1k -t $PROJECTNAME.asc
|
||||
icepack $PROJECTNAME.asc $PROJECTNAME.bin || exit
|
||||
iceprog $PROJECTNAME.bin || exit
|
||||
echo DONE.
|
||||
|
9
BOARDS/run_icestick_show.sh
Executable file
9
BOARDS/run_icestick_show.sh
Executable file
@@ -0,0 +1,9 @@
|
||||
PROJECTNAME=SOC
|
||||
BOARD=icestick
|
||||
BOARD_FREQ=12
|
||||
CPU_FREQ=45
|
||||
FPGA_VARIANT=hx1k
|
||||
FPGA_PACKAGE=tq144
|
||||
VERILOGS=$1
|
||||
yosys -q -DICE_STICK -DBOARD_FREQ=$BOARD_FREQ -DCPU_FREQ=$CPU_FREQ -p "synth_ice40 -relut -top $PROJECTNAME -json $PROJECTNAME.json" $VERILOGS || exit
|
||||
nextpnr-ice40 --gui --force --timing-allow-fail --json $PROJECTNAME.json --pcf BOARDS/$BOARD.pcf --asc $PROJECTNAME.asc --freq $CPU_FREQ --$FPGA_VARIANT --package $FPGA_PACKAGE --pcf-allow-unconstrained --opt-timing || exit
|
13
BOARDS/run_ulx3s.sh
Executable file
13
BOARDS/run_ulx3s.sh
Executable file
@@ -0,0 +1,13 @@
|
||||
PROJECTNAME=SOC
|
||||
BOARD=ulx3s
|
||||
BOARD_FREQ=25
|
||||
CPU_FREQ=100
|
||||
FPGA_VARIANT=85k
|
||||
FPGA_PACKAGE=CABGA381
|
||||
VERILOGS=$1
|
||||
|
||||
yosys -q -DULX3S -DBOARD_FREQ=$BOARD_FREQ -DCPU_FREQ=$CPU_FREQ -p "synth_ecp5 -abc9 -top $PROJECTNAME -json $PROJECTNAME.json" $VERILOGS || exit
|
||||
nextpnr-ecp5 --force --timing-allow-fail --json $PROJECTNAME.json --lpf BOARDS/$BOARD.lpf --textcfg $PROJECTNAME"_out".config --freq $BOARD_FREQ --$FPGA_VARIANT --package $FPGA_PACKAGE || exit
|
||||
ecppack --compress --svf-rowsize 100000 --svf $PROJECTNAME".svf" $PROJECTNAME"_out.config" $PROJECTNAME".bit" || exit
|
||||
ujprog -j FLASH $PROJECTNAME".bit" || exit
|
||||
|
143
BOARDS/tangprimer20k.cst
Normal file
143
BOARDS/tangprimer20k.cst
Normal file
@@ -0,0 +1,143 @@
|
||||
IO_LOC "clk" H11;
|
||||
IO_PORT "clk" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "key_i" T3;
|
||||
IO_LOC "rst_i" T10;
|
||||
IO_PORT "rst_i" IO_TYPE=LVCMOS33;
|
||||
|
||||
IO_LOC "clk_i" IOT27A;
|
||||
|
||||
IO_LOC "led[0]" C13;
|
||||
IO_PORT "led[0]" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "led[1]" A13;
|
||||
IO_PORT "led[1]" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "led[2]" N16;
|
||||
IO_PORT "led[2]" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "led[3]" N14;
|
||||
IO_PORT "led[3]" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "led[4]" L14;
|
||||
IO_PORT "led[4]" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "led[5]" L16;
|
||||
IO_PORT "led[5]" IO_TYPE=LVCMOS33;
|
||||
|
||||
IO_LOC "TXD" A15;
|
||||
IO_PORT "TXD" IO_TYPE=LVCMOS33 PULL_MODE=UP;
|
||||
IO_LOC "RXD" D14;
|
||||
IO_PORT "RXD" IO_TYPE=LVCMOS33 PULL_MODE=UP;
|
||||
|
||||
// fake
|
||||
IO_LOC "led[6]" A15;
|
||||
IO_PORT "led[6]" IO_TYPE=LVCMOS33 PULL_MODE=NONE;
|
||||
IO_LOC "led[7]" D14;
|
||||
IO_PORT "led[7]" IO_TYPE=LVCMOS33 PULL_MODE=NONE;
|
||||
|
||||
IO_LOC "tlvds_p" P6;
|
||||
IO_PORT "tlvds_p" IO_TYPE=LVDS25 PULL_MODE=NONE;
|
||||
IO_LOC "tlvds_n" T6;
|
||||
IO_PORT "tlvds_n" IO_TYPE=LVDS25 PULL_MODE=NONE;
|
||||
|
||||
IO_LOC "elvds_p" C12;
|
||||
IO_PORT "elvds_p" IO_TYPE=LVDS25 PULL_MODE=NONE;
|
||||
IO_LOC "elvds_n" B12;
|
||||
IO_PORT "elvds_n" IO_TYPE=LVDS25 PULL_MODE=NONE;
|
||||
|
||||
IO_LOC "LED_R" C13;
|
||||
IO_PORT "LED_R" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "LED_G" A13;
|
||||
IO_PORT "LED_G" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "LED_B" N16;
|
||||
IO_PORT "LED_B" IO_TYPE=LVCMOS33;
|
||||
|
||||
// oser
|
||||
IO_LOC "oser_out" C13;
|
||||
IO_PORT "oser_out" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "fclk_o" N16;
|
||||
IO_PORT "fclk_o" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "pclk_o" N14;
|
||||
IO_PORT "pclk_o" IO_TYPE=LVCMOS33;
|
||||
|
||||
// ides
|
||||
IO_LOC "fclk_i" B13;
|
||||
IO_PORT "fclk_i" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "data_i" C12;
|
||||
IO_PORT "data_i" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "q_o[0]" P9;
|
||||
IO_PORT "q_o[0]" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "q_o[1]" E15;
|
||||
IO_PORT "q_o[1]" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "q_o[2]" T7;
|
||||
IO_PORT "q_o[2]" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "q_o[3]" R8;
|
||||
IO_PORT "q_o[3]" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "q_o[4]" T6;
|
||||
IO_PORT "q_o[4]" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "q_o[5]" P6;
|
||||
IO_PORT "q_o[5]" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "q_o[6]" T8;
|
||||
IO_PORT "q_o[6]" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "q_o[7]" P8;
|
||||
IO_PORT "q_o[7]" IO_TYPE=LVCMOS33;
|
||||
|
||||
// RGB LCD
|
||||
IO_LOC "LCD_CLK" R9;
|
||||
IO_PORT "LCD_CLK" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "LCD_HYNC" A15;
|
||||
IO_PORT "LCD_HYNC" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "LCD_SYNC" D14;
|
||||
IO_PORT "LCD_SYNC" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "LCD_DEN" E15;
|
||||
IO_PORT "LCD_DEN" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "LCD_R[0]" L9;
|
||||
IO_PORT "LCD_R[0]" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "LCD_R[1]" N8;
|
||||
IO_PORT "LCD_R[1]" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "LCD_R[2]" N9;
|
||||
IO_PORT "LCD_R[2]" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "LCD_R[3]" N7;
|
||||
IO_PORT "LCD_R[3]" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "LCD_R[4]" N6;
|
||||
IO_PORT "LCD_R[4]" IO_TYPE=LVCMOS33;
|
||||
|
||||
IO_LOC "LCD_G[0]" D11;
|
||||
IO_PORT "LCD_G[0]" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "LCD_G[1]" A11;
|
||||
IO_PORT "LCD_G[1]" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "LCD_G[2]" B11;
|
||||
IO_PORT "LCD_G[2]" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "LCD_G[3]" P7;
|
||||
IO_PORT "LCD_G[3]" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "LCD_G[4]" R7;
|
||||
IO_PORT "LCD_G[4]" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "LCD_G[5]" D10;
|
||||
IO_PORT "LCD_G[5]" IO_TYPE=LVCMOS33;
|
||||
|
||||
IO_LOC "LCD_B[0]" B12;
|
||||
IO_PORT "LCD_B[0]" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "LCD_B[1]" C12;
|
||||
IO_PORT "LCD_B[1]" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "LCD_B[2]" B13;
|
||||
IO_PORT "LCD_B[2]" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "LCD_B[3]" A14;
|
||||
IO_PORT "LCD_B[3]" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "LCD_B[4]" B14;
|
||||
IO_PORT "LCD_B[4]" IO_TYPE=LVCMOS33;
|
||||
|
||||
// DVI
|
||||
IO_LOC "tmds_clk_p" G16;
|
||||
IO_PORT "tmds_clk_p" PULL_MODE=NONE DRIVE=3.5;
|
||||
IO_LOC "tmds_clk_n" H15;
|
||||
IO_PORT "tmds_clk_n" PULL_MODE=NONE DRIVE=3.5;
|
||||
IO_LOC "tmds_d_p[0]" H14;
|
||||
IO_PORT "tmds_d_p[0]" PULL_MODE=NONE DRIVE=3.5;
|
||||
IO_LOC "tmds_d_n[0]" H16;
|
||||
IO_PORT "tmds_d_n[0]" PULL_MODE=NONE DRIVE=3.5;
|
||||
IO_LOC "tmds_d_p[1]" J15;
|
||||
IO_PORT "tmds_d_p[1]" PULL_MODE=NONE DRIVE=3.5;
|
||||
IO_LOC "tmds_d_n[1]" K16;
|
||||
IO_PORT "tmds_d_n[1]" PULL_MODE=NONE DRIVE=3.5;
|
||||
IO_LOC "tmds_d_p[2]" K14;
|
||||
IO_PORT "tmds_d_p[2]" PULL_MODE=NONE DRIVE=3.5;
|
||||
IO_LOC "tmds_d_n[2]" K15;
|
||||
IO_PORT "tmds_d_n[2]" PULL_MODE=NONE DRIVE=3.5;
|
||||
|
||||
IO_LOC "div_led" C13;
|
||||
IO_PORT "div_led" IO_TYPE=LVCMOS33;
|
35
BOARDS/ulx3s.lpf
Normal file
35
BOARDS/ulx3s.lpf
Normal file
@@ -0,0 +1,35 @@
|
||||
# See https://github.com/emard/ulx3s/blob/master/doc/constraints/ulx3s_v20.lpf
|
||||
|
||||
## Clock #########################################
|
||||
|
||||
LOCATE COMP "CLK" SITE "G2";
|
||||
IOBUF PORT "CLK" PULLMODE=NONE IO_TYPE=LVCMOS33;
|
||||
FREQUENCY PORT "CLK" 25 MHZ;
|
||||
|
||||
## RESET button ##################################
|
||||
|
||||
LOCATE COMP "RESET" SITE "T1"; # fire 2
|
||||
IOBUF PORT "RESET" IO_TYPE=LVCMOS33;
|
||||
|
||||
## LEDs ##########################################
|
||||
|
||||
LOCATE COMP "LEDS[0]" SITE "B2";
|
||||
LOCATE COMP "LEDS[1]" SITE "C2";
|
||||
LOCATE COMP "LEDS[2]" SITE "C1";
|
||||
LOCATE COMP "LEDS[3]" SITE "D2";
|
||||
LOCATE COMP "LEDS[4]" SITE "D1";
|
||||
|
||||
IOBUF PORT "LEDS[0]" IO_TYPE=LVCMOS33;
|
||||
IOBUF PORT "LEDS[1]" IO_TYPE=LVCMOS33;
|
||||
IOBUF PORT "LEDS[2]" IO_TYPE=LVCMOS33;
|
||||
IOBUF PORT "LEDS[3]" IO_TYPE=LVCMOS33;
|
||||
IOBUF PORT "LEDS[4]" IO_TYPE=LVCMOS33;
|
||||
|
||||
## UART ######################################################
|
||||
|
||||
LOCATE COMP "TXD" SITE "L4"; # FPGA transmits to ftdi
|
||||
LOCATE COMP "RXD" SITE "M1"; # FPGA receives from ftdi
|
||||
|
||||
IOBUF PORT "TXD" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
|
||||
IOBUF PORT "RXD" PULLMODE=UP IO_TYPE=LVCMOS33;
|
||||
|
595
FIRMWARE/COREMARK/core_list_join.c
Normal file
595
FIRMWARE/COREMARK/core_list_join.c
Normal file
@@ -0,0 +1,595 @@
|
||||
/*
|
||||
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
Original Author: Shay Gal-on
|
||||
*/
|
||||
|
||||
#include "coremark.h"
|
||||
/*
|
||||
Topic: Description
|
||||
Benchmark using a linked list.
|
||||
|
||||
Linked list is a common data structure used in many applications.
|
||||
|
||||
For our purposes, this will excercise the memory units of the processor.
|
||||
In particular, usage of the list pointers to find and alter data.
|
||||
|
||||
We are not using Malloc since some platforms do not support this
|
||||
library.
|
||||
|
||||
Instead, the memory block being passed in is used to create a list,
|
||||
and the benchmark takes care not to add more items then can be
|
||||
accommodated by the memory block. The porting layer will make sure
|
||||
that we have a valid memory block.
|
||||
|
||||
All operations are done in place, without using any extra memory.
|
||||
|
||||
The list itself contains list pointers and pointers to data items.
|
||||
Data items contain the following:
|
||||
|
||||
idx - An index that captures the initial order of the list.
|
||||
data - Variable data initialized based on the input parameters. The 16b
|
||||
are divided as follows: o Upper 8b are backup of original data. o Bit 7
|
||||
indicates if the lower 7 bits are to be used as is or calculated. o Bits 0-2
|
||||
indicate type of operation to perform to get a 7b value. o Bits 3-6 provide
|
||||
input for the operation.
|
||||
|
||||
*/
|
||||
|
||||
/* local functions */
|
||||
|
||||
list_head *core_list_find(list_head *list, list_data *info);
|
||||
list_head *core_list_reverse(list_head *list);
|
||||
list_head *core_list_remove(list_head *item);
|
||||
list_head *core_list_undo_remove(list_head *item_removed,
|
||||
list_head *item_modified);
|
||||
list_head *core_list_insert_new(list_head * insert_point,
|
||||
list_data * info,
|
||||
list_head **memblock,
|
||||
list_data **datablock,
|
||||
list_head * memblock_end,
|
||||
list_data * datablock_end);
|
||||
typedef ee_s32 (*list_cmp)(list_data *a, list_data *b, core_results *res);
|
||||
list_head *core_list_mergesort(list_head * list,
|
||||
list_cmp cmp,
|
||||
core_results *res);
|
||||
|
||||
ee_s16
|
||||
calc_func(ee_s16 *pdata, core_results *res)
|
||||
{
|
||||
ee_s16 data = *pdata;
|
||||
ee_s16 retval;
|
||||
ee_u8 optype
|
||||
= (data >> 7)
|
||||
& 1; /* bit 7 indicates if the function result has been cached */
|
||||
if (optype) /* if cached, use cache */
|
||||
return (data & 0x007f);
|
||||
else
|
||||
{ /* otherwise calculate and cache the result */
|
||||
ee_s16 flag = data & 0x7; /* bits 0-2 is type of function to perform */
|
||||
ee_s16 dtype
|
||||
= ((data >> 3)
|
||||
& 0xf); /* bits 3-6 is specific data for the operation */
|
||||
dtype |= dtype << 4; /* replicate the lower 4 bits to get an 8b value */
|
||||
switch (flag)
|
||||
{
|
||||
case 0:
|
||||
if (dtype < 0x22) /* set min period for bit corruption */
|
||||
dtype = 0x22;
|
||||
retval = core_bench_state(res->size,
|
||||
res->memblock[3],
|
||||
res->seed1,
|
||||
res->seed2,
|
||||
dtype,
|
||||
res->crc);
|
||||
if (res->crcstate == 0)
|
||||
res->crcstate = retval;
|
||||
break;
|
||||
case 1:
|
||||
retval = core_bench_matrix(&(res->mat), dtype, res->crc);
|
||||
if (res->crcmatrix == 0)
|
||||
res->crcmatrix = retval;
|
||||
break;
|
||||
default:
|
||||
retval = data;
|
||||
break;
|
||||
}
|
||||
res->crc = crcu16(retval, res->crc);
|
||||
retval &= 0x007f;
|
||||
*pdata = (data & 0xff00) | 0x0080 | retval; /* cache the result */
|
||||
return retval;
|
||||
}
|
||||
}
|
||||
/* Function: cmp_complex
|
||||
Compare the data item in a list cell.
|
||||
|
||||
Can be used by mergesort.
|
||||
*/
|
||||
ee_s32
|
||||
cmp_complex(list_data *a, list_data *b, core_results *res)
|
||||
{
|
||||
ee_s16 val1 = calc_func(&(a->data16), res);
|
||||
ee_s16 val2 = calc_func(&(b->data16), res);
|
||||
return val1 - val2;
|
||||
}
|
||||
|
||||
/* Function: cmp_idx
|
||||
Compare the idx item in a list cell, and regen the data.
|
||||
|
||||
Can be used by mergesort.
|
||||
*/
|
||||
ee_s32
|
||||
cmp_idx(list_data *a, list_data *b, core_results *res)
|
||||
{
|
||||
if (res == NULL)
|
||||
{
|
||||
a->data16 = (a->data16 & 0xff00) | (0x00ff & (a->data16 >> 8));
|
||||
b->data16 = (b->data16 & 0xff00) | (0x00ff & (b->data16 >> 8));
|
||||
}
|
||||
return a->idx - b->idx;
|
||||
}
|
||||
|
||||
void
|
||||
copy_info(list_data *to, list_data *from)
|
||||
{
|
||||
to->data16 = from->data16;
|
||||
to->idx = from->idx;
|
||||
}
|
||||
|
||||
/* Benchmark for linked list:
|
||||
- Try to find multiple data items.
|
||||
- List sort
|
||||
- Operate on data from list (crc)
|
||||
- Single remove/reinsert
|
||||
* At the end of this function, the list is back to original state
|
||||
*/
|
||||
ee_u16
|
||||
core_bench_list(core_results *res, ee_s16 finder_idx)
|
||||
{
|
||||
ee_u16 retval = 0;
|
||||
ee_u16 found = 0, missed = 0;
|
||||
list_head *list = res->list;
|
||||
ee_s16 find_num = res->seed3;
|
||||
list_head *this_find;
|
||||
list_head *finder, *remover;
|
||||
list_data info;
|
||||
ee_s16 i;
|
||||
|
||||
info.idx = finder_idx;
|
||||
/* find <find_num> values in the list, and change the list each time
|
||||
* (reverse and cache if value found) */
|
||||
for (i = 0; i < find_num; i++)
|
||||
{
|
||||
info.data16 = (i & 0xff);
|
||||
this_find = core_list_find(list, &info);
|
||||
list = core_list_reverse(list);
|
||||
if (this_find == NULL)
|
||||
{
|
||||
missed++;
|
||||
retval += (list->next->info->data16 >> 8) & 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
found++;
|
||||
if (this_find->info->data16 & 0x1) /* use found value */
|
||||
retval += (this_find->info->data16 >> 9) & 1;
|
||||
/* and cache next item at the head of the list (if any) */
|
||||
if (this_find->next != NULL)
|
||||
{
|
||||
finder = this_find->next;
|
||||
this_find->next = finder->next;
|
||||
finder->next = list->next;
|
||||
list->next = finder;
|
||||
}
|
||||
}
|
||||
if (info.idx >= 0)
|
||||
info.idx++;
|
||||
#if CORE_DEBUG
|
||||
ee_printf("List find %d: [%d,%d,%d]\n", i, retval, missed, found);
|
||||
#endif
|
||||
}
|
||||
retval += found * 4 - missed;
|
||||
/* sort the list by data content and remove one item*/
|
||||
if (finder_idx > 0)
|
||||
list = core_list_mergesort(list, cmp_complex, res);
|
||||
remover = core_list_remove(list->next);
|
||||
/* CRC data content of list from location of index N forward, and then undo
|
||||
* remove */
|
||||
finder = core_list_find(list, &info);
|
||||
if (!finder)
|
||||
finder = list->next;
|
||||
while (finder)
|
||||
{
|
||||
retval = crc16(list->info->data16, retval);
|
||||
finder = finder->next;
|
||||
}
|
||||
#if CORE_DEBUG
|
||||
ee_printf("List sort 1: %04x\n", retval);
|
||||
#endif
|
||||
remover = core_list_undo_remove(remover, list->next);
|
||||
/* sort the list by index, in effect returning the list to original state */
|
||||
list = core_list_mergesort(list, cmp_idx, NULL);
|
||||
/* CRC data content of list */
|
||||
finder = list->next;
|
||||
while (finder)
|
||||
{
|
||||
retval = crc16(list->info->data16, retval);
|
||||
finder = finder->next;
|
||||
}
|
||||
#if CORE_DEBUG
|
||||
ee_printf("List sort 2: %04x\n", retval);
|
||||
#endif
|
||||
return retval;
|
||||
}
|
||||
/* Function: core_list_init
|
||||
Initialize list with data.
|
||||
|
||||
Parameters:
|
||||
blksize - Size of memory to be initialized.
|
||||
memblock - Pointer to memory block.
|
||||
seed - Actual values chosen depend on the seed parameter.
|
||||
The seed parameter MUST be supplied from a source that cannot be
|
||||
determined at compile time
|
||||
|
||||
Returns:
|
||||
Pointer to the head of the list.
|
||||
|
||||
*/
|
||||
list_head *
|
||||
core_list_init(ee_u32 blksize, list_head *memblock, ee_s16 seed)
|
||||
{
|
||||
/* calculated pointers for the list */
|
||||
ee_u32 per_item = 16 + sizeof(struct list_data_s);
|
||||
ee_u32 size = (blksize / per_item)
|
||||
- 2; /* to accommodate systems with 64b pointers, and make sure
|
||||
same code is executed, set max list elements */
|
||||
list_head *memblock_end = memblock + size;
|
||||
list_data *datablock = (list_data *)(memblock_end);
|
||||
list_data *datablock_end = datablock + size;
|
||||
/* some useful variables */
|
||||
ee_u32 i;
|
||||
list_head *finder, *list = memblock;
|
||||
list_data info;
|
||||
|
||||
/* create a fake items for the list head and tail */
|
||||
list->next = NULL;
|
||||
list->info = datablock;
|
||||
list->info->idx = 0x0000;
|
||||
list->info->data16 = (ee_s16)0x8080;
|
||||
memblock++;
|
||||
datablock++;
|
||||
info.idx = 0x7fff;
|
||||
info.data16 = (ee_s16)0xffff;
|
||||
core_list_insert_new(
|
||||
list, &info, &memblock, &datablock, memblock_end, datablock_end);
|
||||
|
||||
/* then insert size items */
|
||||
for (i = 0; i < size; i++)
|
||||
{
|
||||
ee_u16 datpat = ((ee_u16)(seed ^ i) & 0xf);
|
||||
ee_u16 dat
|
||||
= (datpat << 3) | (i & 0x7); /* alternate between algorithms */
|
||||
info.data16 = (dat << 8) | dat; /* fill the data with actual data and
|
||||
upper bits with rebuild value */
|
||||
core_list_insert_new(
|
||||
list, &info, &memblock, &datablock, memblock_end, datablock_end);
|
||||
}
|
||||
/* and now index the list so we know initial seed order of the list */
|
||||
finder = list->next;
|
||||
i = 1;
|
||||
while (finder->next != NULL)
|
||||
{
|
||||
if (i < size / 5) /* first 20% of the list in order */
|
||||
finder->info->idx = i++;
|
||||
else
|
||||
{
|
||||
ee_u16 pat = (ee_u16)(i++ ^ seed); /* get a pseudo random number */
|
||||
finder->info->idx = 0x3fff
|
||||
& (((i & 0x07) << 8)
|
||||
| pat); /* make sure the mixed items end up
|
||||
after the ones in sequence */
|
||||
}
|
||||
finder = finder->next;
|
||||
}
|
||||
list = core_list_mergesort(list, cmp_idx, NULL);
|
||||
#if CORE_DEBUG
|
||||
ee_printf("Initialized list:\n");
|
||||
finder = list;
|
||||
while (finder)
|
||||
{
|
||||
ee_printf(
|
||||
"[%04x,%04x]", finder->info->idx, (ee_u16)finder->info->data16);
|
||||
finder = finder->next;
|
||||
}
|
||||
ee_printf("\n");
|
||||
#endif
|
||||
return list;
|
||||
}
|
||||
|
||||
/* Function: core_list_insert
|
||||
Insert an item to the list
|
||||
|
||||
Parameters:
|
||||
insert_point - where to insert the item.
|
||||
info - data for the cell.
|
||||
memblock - pointer for the list header
|
||||
datablock - pointer for the list data
|
||||
memblock_end - end of region for list headers
|
||||
datablock_end - end of region for list data
|
||||
|
||||
Returns:
|
||||
Pointer to new item.
|
||||
*/
|
||||
list_head *
|
||||
core_list_insert_new(list_head * insert_point,
|
||||
list_data * info,
|
||||
list_head **memblock,
|
||||
list_data **datablock,
|
||||
list_head * memblock_end,
|
||||
list_data * datablock_end)
|
||||
{
|
||||
list_head *newitem;
|
||||
|
||||
if ((*memblock + 1) >= memblock_end)
|
||||
return NULL;
|
||||
if ((*datablock + 1) >= datablock_end)
|
||||
return NULL;
|
||||
|
||||
newitem = *memblock;
|
||||
(*memblock)++;
|
||||
newitem->next = insert_point->next;
|
||||
insert_point->next = newitem;
|
||||
|
||||
newitem->info = *datablock;
|
||||
(*datablock)++;
|
||||
copy_info(newitem->info, info);
|
||||
|
||||
return newitem;
|
||||
}
|
||||
|
||||
/* Function: core_list_remove
|
||||
Remove an item from the list.
|
||||
|
||||
Operation:
|
||||
For a singly linked list, remove by copying the data from the next item
|
||||
over to the current cell, and unlinking the next item.
|
||||
|
||||
Note:
|
||||
since there is always a fake item at the end of the list, no need to
|
||||
check for NULL.
|
||||
|
||||
Returns:
|
||||
Removed item.
|
||||
*/
|
||||
list_head *
|
||||
core_list_remove(list_head *item)
|
||||
{
|
||||
list_data *tmp;
|
||||
list_head *ret = item->next;
|
||||
/* swap data pointers */
|
||||
tmp = item->info;
|
||||
item->info = ret->info;
|
||||
ret->info = tmp;
|
||||
/* and eliminate item */
|
||||
item->next = item->next->next;
|
||||
ret->next = NULL;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Function: core_list_undo_remove
|
||||
Undo a remove operation.
|
||||
|
||||
Operation:
|
||||
Since we want each iteration of the benchmark to be exactly the same,
|
||||
we need to be able to undo a remove.
|
||||
Link the removed item back into the list, and switch the info items.
|
||||
|
||||
Parameters:
|
||||
item_removed - Return value from the <core_list_remove>
|
||||
item_modified - List item that was modified during <core_list_remove>
|
||||
|
||||
Returns:
|
||||
The item that was linked back to the list.
|
||||
|
||||
*/
|
||||
list_head *
|
||||
core_list_undo_remove(list_head *item_removed, list_head *item_modified)
|
||||
{
|
||||
list_data *tmp;
|
||||
/* swap data pointers */
|
||||
tmp = item_removed->info;
|
||||
item_removed->info = item_modified->info;
|
||||
item_modified->info = tmp;
|
||||
/* and insert item */
|
||||
item_removed->next = item_modified->next;
|
||||
item_modified->next = item_removed;
|
||||
return item_removed;
|
||||
}
|
||||
|
||||
/* Function: core_list_find
|
||||
Find an item in the list
|
||||
|
||||
Operation:
|
||||
Find an item by idx (if not 0) or specific data value
|
||||
|
||||
Parameters:
|
||||
list - list head
|
||||
info - idx or data to find
|
||||
|
||||
Returns:
|
||||
Found item, or NULL if not found.
|
||||
*/
|
||||
list_head *
|
||||
core_list_find(list_head *list, list_data *info)
|
||||
{
|
||||
if (info->idx >= 0)
|
||||
{
|
||||
while (list && (list->info->idx != info->idx))
|
||||
list = list->next;
|
||||
return list;
|
||||
}
|
||||
else
|
||||
{
|
||||
while (list && ((list->info->data16 & 0xff) != info->data16))
|
||||
list = list->next;
|
||||
return list;
|
||||
}
|
||||
}
|
||||
/* Function: core_list_reverse
|
||||
Reverse a list
|
||||
|
||||
Operation:
|
||||
Rearrange the pointers so the list is reversed.
|
||||
|
||||
Parameters:
|
||||
list - list head
|
||||
info - idx or data to find
|
||||
|
||||
Returns:
|
||||
Found item, or NULL if not found.
|
||||
*/
|
||||
|
||||
list_head *
|
||||
core_list_reverse(list_head *list)
|
||||
{
|
||||
list_head *next = NULL, *tmp;
|
||||
while (list)
|
||||
{
|
||||
tmp = list->next;
|
||||
list->next = next;
|
||||
next = list;
|
||||
list = tmp;
|
||||
}
|
||||
return next;
|
||||
}
|
||||
/* Function: core_list_mergesort
|
||||
Sort the list in place without recursion.
|
||||
|
||||
Description:
|
||||
Use mergesort, as for linked list this is a realistic solution.
|
||||
Also, since this is aimed at embedded, care was taken to use iterative
|
||||
rather then recursive algorithm. The sort can either return the list to
|
||||
original order (by idx) , or use the data item to invoke other other
|
||||
algorithms and change the order of the list.
|
||||
|
||||
Parameters:
|
||||
list - list to be sorted.
|
||||
cmp - cmp function to use
|
||||
|
||||
Returns:
|
||||
New head of the list.
|
||||
|
||||
Note:
|
||||
We have a special header for the list that will always be first,
|
||||
but the algorithm could theoretically modify where the list starts.
|
||||
|
||||
*/
|
||||
list_head *
|
||||
core_list_mergesort(list_head *list, list_cmp cmp, core_results *res)
|
||||
{
|
||||
list_head *p, *q, *e, *tail;
|
||||
ee_s32 insize, nmerges, psize, qsize, i;
|
||||
|
||||
insize = 1;
|
||||
|
||||
while (1)
|
||||
{
|
||||
p = list;
|
||||
list = NULL;
|
||||
tail = NULL;
|
||||
|
||||
nmerges = 0; /* count number of merges we do in this pass */
|
||||
|
||||
while (p)
|
||||
{
|
||||
nmerges++; /* there exists a merge to be done */
|
||||
/* step `insize' places along from p */
|
||||
q = p;
|
||||
psize = 0;
|
||||
for (i = 0; i < insize; i++)
|
||||
{
|
||||
psize++;
|
||||
q = q->next;
|
||||
if (!q)
|
||||
break;
|
||||
}
|
||||
|
||||
/* if q hasn't fallen off end, we have two lists to merge */
|
||||
qsize = insize;
|
||||
|
||||
/* now we have two lists; merge them */
|
||||
while (psize > 0 || (qsize > 0 && q))
|
||||
{
|
||||
|
||||
/* decide whether next element of merge comes from p or q */
|
||||
if (psize == 0)
|
||||
{
|
||||
/* p is empty; e must come from q. */
|
||||
e = q;
|
||||
q = q->next;
|
||||
qsize--;
|
||||
}
|
||||
else if (qsize == 0 || !q)
|
||||
{
|
||||
/* q is empty; e must come from p. */
|
||||
e = p;
|
||||
p = p->next;
|
||||
psize--;
|
||||
}
|
||||
else if (cmp(p->info, q->info, res) <= 0)
|
||||
{
|
||||
/* First element of p is lower (or same); e must come from
|
||||
* p. */
|
||||
e = p;
|
||||
p = p->next;
|
||||
psize--;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* First element of q is lower; e must come from q. */
|
||||
e = q;
|
||||
q = q->next;
|
||||
qsize--;
|
||||
}
|
||||
|
||||
/* add the next element to the merged list */
|
||||
if (tail)
|
||||
{
|
||||
tail->next = e;
|
||||
}
|
||||
else
|
||||
{
|
||||
list = e;
|
||||
}
|
||||
tail = e;
|
||||
}
|
||||
|
||||
/* now p has stepped `insize' places along, and q has too */
|
||||
p = q;
|
||||
}
|
||||
|
||||
tail->next = NULL;
|
||||
|
||||
/* If we have done only one merge, we're finished. */
|
||||
if (nmerges <= 1) /* allow for nmerges==0, the empty list case */
|
||||
return list;
|
||||
|
||||
/* Otherwise repeat, merging lists twice the size */
|
||||
insize *= 2;
|
||||
}
|
||||
#if COMPILER_REQUIRES_SORT_RETURN
|
||||
return list;
|
||||
#endif
|
||||
}
|
451
FIRMWARE/COREMARK/core_main.c
Normal file
451
FIRMWARE/COREMARK/core_main.c
Normal file
@@ -0,0 +1,451 @@
|
||||
/*
|
||||
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
Original Author: Shay Gal-on
|
||||
*/
|
||||
|
||||
/* File: core_main.c
|
||||
This file contains the framework to acquire a block of memory, seed
|
||||
initial parameters, tun t he benchmark and report the results.
|
||||
*/
|
||||
#include "coremark.h"
|
||||
|
||||
/* Function: iterate
|
||||
Run the benchmark for a specified number of iterations.
|
||||
|
||||
Operation:
|
||||
For each type of benchmarked algorithm:
|
||||
a - Initialize the data block for the algorithm.
|
||||
b - Execute the algorithm N times.
|
||||
|
||||
Returns:
|
||||
NULL.
|
||||
*/
|
||||
static ee_u16 list_known_crc[] = { (ee_u16)0xd4b0,
|
||||
(ee_u16)0x3340,
|
||||
(ee_u16)0x6a79,
|
||||
(ee_u16)0xe714,
|
||||
(ee_u16)0xe3c1 };
|
||||
static ee_u16 matrix_known_crc[] = { (ee_u16)0xbe52,
|
||||
(ee_u16)0x1199,
|
||||
(ee_u16)0x5608,
|
||||
(ee_u16)0x1fd7,
|
||||
(ee_u16)0x0747 };
|
||||
static ee_u16 state_known_crc[] = { (ee_u16)0x5e47,
|
||||
(ee_u16)0x39bf,
|
||||
(ee_u16)0xe5a4,
|
||||
(ee_u16)0x8e3a,
|
||||
(ee_u16)0x8d84 };
|
||||
void *
|
||||
iterate(void *pres)
|
||||
{
|
||||
ee_u32 i;
|
||||
ee_u16 crc;
|
||||
core_results *res = (core_results *)pres;
|
||||
ee_u32 iterations = res->iterations;
|
||||
res->crc = 0;
|
||||
res->crclist = 0;
|
||||
res->crcmatrix = 0;
|
||||
res->crcstate = 0;
|
||||
|
||||
for (i = 0; i < iterations; i++)
|
||||
{
|
||||
crc = core_bench_list(res, 1);
|
||||
res->crc = crcu16(crc, res->crc);
|
||||
crc = core_bench_list(res, -1);
|
||||
res->crc = crcu16(crc, res->crc);
|
||||
if (i == 0)
|
||||
res->crclist = res->crc;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#if (SEED_METHOD == SEED_ARG)
|
||||
ee_s32 get_seed_args(int i, int argc, char *argv[]);
|
||||
#define get_seed(x) (ee_s16) get_seed_args(x, argc, argv)
|
||||
#define get_seed_32(x) get_seed_args(x, argc, argv)
|
||||
#else /* via function or volatile */
|
||||
ee_s32 get_seed_32(int i);
|
||||
#define get_seed(x) (ee_s16) get_seed_32(x)
|
||||
#endif
|
||||
|
||||
#if (MEM_METHOD == MEM_STATIC)
|
||||
ee_u8 static_memblk[TOTAL_DATA_SIZE];
|
||||
#endif
|
||||
char *mem_name[3] = { "Static", "Heap", "Stack" };
|
||||
/* Function: main
|
||||
Main entry routine for the benchmark.
|
||||
This function is responsible for the following steps:
|
||||
|
||||
1 - Initialize input seeds from a source that cannot be determined at
|
||||
compile time. 2 - Initialize memory block for use. 3 - Run and time the
|
||||
benchmark. 4 - Report results, testing the validity of the output if the
|
||||
seeds are known.
|
||||
|
||||
Arguments:
|
||||
1 - first seed : Any value
|
||||
2 - second seed : Must be identical to first for iterations to be
|
||||
identical 3 - third seed : Any value, should be at least an order of
|
||||
magnitude less then the input size, but bigger then 32. 4 - Iterations :
|
||||
Special, if set to 0, iterations will be automatically determined such that
|
||||
the benchmark will run between 10 to 100 secs
|
||||
|
||||
*/
|
||||
|
||||
#if MAIN_HAS_NOARGC
|
||||
MAIN_RETURN_TYPE
|
||||
main(void)
|
||||
{
|
||||
int argc = 0;
|
||||
char *argv[1];
|
||||
#else
|
||||
MAIN_RETURN_TYPE
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
#endif
|
||||
ee_u16 i, j = 0, num_algorithms = 0;
|
||||
ee_s16 known_id = -1, total_errors = 0;
|
||||
ee_u16 seedcrc = 0;
|
||||
CORE_TICKS total_time;
|
||||
core_results results[MULTITHREAD];
|
||||
#if (MEM_METHOD == MEM_STACK)
|
||||
ee_u8 stack_memblock[TOTAL_DATA_SIZE * MULTITHREAD];
|
||||
#endif
|
||||
/* first call any initializations needed */
|
||||
portable_init(&(results[0].port), &argc, argv);
|
||||
/* First some checks to make sure benchmark will run ok */
|
||||
if (sizeof(struct list_head_s) > 128)
|
||||
{
|
||||
ee_printf("list_head structure too big for comparable data!\n");
|
||||
return MAIN_RETURN_VAL;
|
||||
}
|
||||
results[0].seed1 = get_seed(1);
|
||||
results[0].seed2 = get_seed(2);
|
||||
results[0].seed3 = get_seed(3);
|
||||
results[0].iterations = get_seed_32(4);
|
||||
#if CORE_DEBUG
|
||||
results[0].iterations = 1;
|
||||
#endif
|
||||
results[0].execs = get_seed_32(5);
|
||||
if (results[0].execs == 0)
|
||||
{ /* if not supplied, execute all algorithms */
|
||||
results[0].execs = ALL_ALGORITHMS_MASK;
|
||||
}
|
||||
/* put in some default values based on one seed only for easy testing */
|
||||
if ((results[0].seed1 == 0) && (results[0].seed2 == 0)
|
||||
&& (results[0].seed3 == 0))
|
||||
{ /* performance run */
|
||||
results[0].seed1 = 0;
|
||||
results[0].seed2 = 0;
|
||||
results[0].seed3 = 0x66;
|
||||
}
|
||||
if ((results[0].seed1 == 1) && (results[0].seed2 == 0)
|
||||
&& (results[0].seed3 == 0))
|
||||
{ /* validation run */
|
||||
results[0].seed1 = 0x3415;
|
||||
results[0].seed2 = 0x3415;
|
||||
results[0].seed3 = 0x66;
|
||||
}
|
||||
#if (MEM_METHOD == MEM_STATIC)
|
||||
results[0].memblock[0] = (void *)static_memblk;
|
||||
results[0].size = TOTAL_DATA_SIZE;
|
||||
results[0].err = 0;
|
||||
#if (MULTITHREAD > 1)
|
||||
#error "Cannot use a static data area with multiple contexts!"
|
||||
#endif
|
||||
#elif (MEM_METHOD == MEM_MALLOC)
|
||||
for (i = 0; i < MULTITHREAD; i++)
|
||||
{
|
||||
ee_s32 malloc_override = get_seed(7);
|
||||
if (malloc_override != 0)
|
||||
results[i].size = malloc_override;
|
||||
else
|
||||
results[i].size = TOTAL_DATA_SIZE;
|
||||
results[i].memblock[0] = portable_malloc(results[i].size);
|
||||
results[i].seed1 = results[0].seed1;
|
||||
results[i].seed2 = results[0].seed2;
|
||||
results[i].seed3 = results[0].seed3;
|
||||
results[i].err = 0;
|
||||
results[i].execs = results[0].execs;
|
||||
}
|
||||
#elif (MEM_METHOD == MEM_STACK)
|
||||
for (i = 0; i < MULTITHREAD; i++)
|
||||
{
|
||||
results[i].memblock[0] = stack_memblock + i * TOTAL_DATA_SIZE;
|
||||
results[i].size = TOTAL_DATA_SIZE;
|
||||
results[i].seed1 = results[0].seed1;
|
||||
results[i].seed2 = results[0].seed2;
|
||||
results[i].seed3 = results[0].seed3;
|
||||
results[i].err = 0;
|
||||
results[i].execs = results[0].execs;
|
||||
}
|
||||
#else
|
||||
#error "Please define a way to initialize a memory block."
|
||||
#endif
|
||||
/* Data init */
|
||||
/* Find out how space much we have based on number of algorithms */
|
||||
for (i = 0; i < NUM_ALGORITHMS; i++)
|
||||
{
|
||||
if ((1 << (ee_u32)i) & results[0].execs)
|
||||
num_algorithms++;
|
||||
}
|
||||
for (i = 0; i < MULTITHREAD; i++)
|
||||
results[i].size = results[i].size / num_algorithms;
|
||||
/* Assign pointers */
|
||||
for (i = 0; i < NUM_ALGORITHMS; i++)
|
||||
{
|
||||
ee_u32 ctx;
|
||||
if ((1 << (ee_u32)i) & results[0].execs)
|
||||
{
|
||||
for (ctx = 0; ctx < MULTITHREAD; ctx++)
|
||||
results[ctx].memblock[i + 1]
|
||||
= (char *)(results[ctx].memblock[0]) + results[0].size * j;
|
||||
j++;
|
||||
}
|
||||
}
|
||||
/* call inits */
|
||||
for (i = 0; i < MULTITHREAD; i++)
|
||||
{
|
||||
if (results[i].execs & ID_LIST)
|
||||
{
|
||||
results[i].list = core_list_init(
|
||||
results[0].size, results[i].memblock[1], results[i].seed1);
|
||||
}
|
||||
if (results[i].execs & ID_MATRIX)
|
||||
{
|
||||
core_init_matrix(results[0].size,
|
||||
results[i].memblock[2],
|
||||
(ee_s32)results[i].seed1
|
||||
| (((ee_s32)results[i].seed2) << 16),
|
||||
&(results[i].mat));
|
||||
}
|
||||
if (results[i].execs & ID_STATE)
|
||||
{
|
||||
core_init_state(
|
||||
results[0].size, results[i].seed1, results[i].memblock[3]);
|
||||
}
|
||||
}
|
||||
|
||||
/* automatically determine number of iterations if not set */
|
||||
if (results[0].iterations == 0)
|
||||
{
|
||||
secs_ret secs_passed = 0;
|
||||
ee_u32 divisor;
|
||||
results[0].iterations = 1;
|
||||
while (secs_passed < (secs_ret)1)
|
||||
{
|
||||
results[0].iterations *= 10;
|
||||
start_time();
|
||||
iterate(&results[0]);
|
||||
stop_time();
|
||||
secs_passed = time_in_secs(get_time());
|
||||
}
|
||||
/* now we know it executes for at least 1 sec, set actual run time at
|
||||
* about 10 secs */
|
||||
divisor = (ee_u32)secs_passed;
|
||||
if (divisor == 0) /* some machines cast float to int as 0 since this
|
||||
conversion is not defined by ANSI, but we know at
|
||||
least one second passed */
|
||||
divisor = 1;
|
||||
results[0].iterations *= 1 + 10 / divisor;
|
||||
}
|
||||
/* perform actual benchmark */
|
||||
start_time();
|
||||
#if (MULTITHREAD > 1)
|
||||
if (default_num_contexts > MULTITHREAD)
|
||||
{
|
||||
default_num_contexts = MULTITHREAD;
|
||||
}
|
||||
for (i = 0; i < default_num_contexts; i++)
|
||||
{
|
||||
results[i].iterations = results[0].iterations;
|
||||
results[i].execs = results[0].execs;
|
||||
core_start_parallel(&results[i]);
|
||||
}
|
||||
for (i = 0; i < default_num_contexts; i++)
|
||||
{
|
||||
core_stop_parallel(&results[i]);
|
||||
}
|
||||
#else
|
||||
iterate(&results[0]);
|
||||
#endif
|
||||
stop_time();
|
||||
total_time = get_time();
|
||||
/* get a function of the input to report */
|
||||
seedcrc = crc16(results[0].seed1, seedcrc);
|
||||
seedcrc = crc16(results[0].seed2, seedcrc);
|
||||
seedcrc = crc16(results[0].seed3, seedcrc);
|
||||
seedcrc = crc16(results[0].size, seedcrc);
|
||||
|
||||
switch (seedcrc)
|
||||
{ /* test known output for common seeds */
|
||||
case 0x8a02: /* seed1=0, seed2=0, seed3=0x66, size 2000 per algorithm */
|
||||
known_id = 0;
|
||||
ee_printf("6k performance run parameters for coremark.\n");
|
||||
break;
|
||||
case 0x7b05: /* seed1=0x3415, seed2=0x3415, seed3=0x66, size 2000 per
|
||||
algorithm */
|
||||
known_id = 1;
|
||||
ee_printf("6k validation run parameters for coremark.\n");
|
||||
break;
|
||||
case 0x4eaf: /* seed1=0x8, seed2=0x8, seed3=0x8, size 400 per algorithm
|
||||
*/
|
||||
known_id = 2;
|
||||
ee_printf("Profile generation run parameters for coremark.\n");
|
||||
break;
|
||||
case 0xe9f5: /* seed1=0, seed2=0, seed3=0x66, size 666 per algorithm */
|
||||
known_id = 3;
|
||||
ee_printf("2K performance run parameters for coremark.\n");
|
||||
break;
|
||||
case 0x18f2: /* seed1=0x3415, seed2=0x3415, seed3=0x66, size 666 per
|
||||
algorithm */
|
||||
known_id = 4;
|
||||
ee_printf("2K validation run parameters for coremark.\n");
|
||||
break;
|
||||
default:
|
||||
total_errors = -1;
|
||||
break;
|
||||
}
|
||||
if (known_id >= 0)
|
||||
{
|
||||
for (i = 0; i < default_num_contexts; i++)
|
||||
{
|
||||
results[i].err = 0;
|
||||
if ((results[i].execs & ID_LIST)
|
||||
&& (results[i].crclist != list_known_crc[known_id]))
|
||||
{
|
||||
ee_printf("[%u]ERROR! list crc 0x%04x - should be 0x%04x\n",
|
||||
i,
|
||||
results[i].crclist,
|
||||
list_known_crc[known_id]);
|
||||
results[i].err++;
|
||||
}
|
||||
if ((results[i].execs & ID_MATRIX)
|
||||
&& (results[i].crcmatrix != matrix_known_crc[known_id]))
|
||||
{
|
||||
ee_printf("[%u]ERROR! matrix crc 0x%04x - should be 0x%04x\n",
|
||||
i,
|
||||
results[i].crcmatrix,
|
||||
matrix_known_crc[known_id]);
|
||||
results[i].err++;
|
||||
}
|
||||
if ((results[i].execs & ID_STATE)
|
||||
&& (results[i].crcstate != state_known_crc[known_id]))
|
||||
{
|
||||
ee_printf("[%u]ERROR! state crc 0x%04x - should be 0x%04x\n",
|
||||
i,
|
||||
results[i].crcstate,
|
||||
state_known_crc[known_id]);
|
||||
results[i].err++;
|
||||
}
|
||||
total_errors += results[i].err;
|
||||
}
|
||||
}
|
||||
total_errors += check_data_types();
|
||||
/* and report results */
|
||||
ee_printf("CoreMark Size : %lu\n", (long unsigned)results[0].size);
|
||||
ee_printf("Total ticks : %lu\n", (long unsigned)total_time);
|
||||
#if HAS_FLOAT
|
||||
ee_printf("Total time (secs): %f\n", time_in_secs(total_time));
|
||||
if (time_in_secs(total_time) > 0)
|
||||
ee_printf("Iterations/Sec : %f\n",
|
||||
(default_num_contexts * results[0].iterations)
|
||||
/ time_in_secs(total_time));
|
||||
#else
|
||||
/*
|
||||
ee_printf("Total time (secs): %d\n", time_in_secs(total_time));
|
||||
if (time_in_secs(total_time) > 0)
|
||||
ee_printf("Iterations/Sec : %d\n",
|
||||
default_num_contexts * results[0].iterations
|
||||
/ time_in_secs(total_time));
|
||||
*/
|
||||
#endif
|
||||
|
||||
|
||||
print_coremarks(total_time);
|
||||
|
||||
if (time_in_secs(total_time) < 10)
|
||||
{
|
||||
ee_printf(
|
||||
"ERROR! Must execute for at least 10 secs for a valid result!\n");
|
||||
// total_errors++;
|
||||
}
|
||||
|
||||
/*
|
||||
ee_printf("Iterations : %lu\n",
|
||||
(long unsigned)default_num_contexts * results[0].iterations);
|
||||
ee_printf("Compiler version : %s\n", COMPILER_VERSION);
|
||||
ee_printf("Compiler flags : %s\n", COMPILER_FLAGS);
|
||||
*/
|
||||
|
||||
#if (MULTITHREAD > 1)
|
||||
ee_printf("Parallel %s : %d\n", PARALLEL_METHOD, default_num_contexts);
|
||||
#endif
|
||||
ee_printf("Memory location : %s\n", MEM_LOCATION);
|
||||
/* output for verification */
|
||||
ee_printf("seedcrc : 0x%04x\n", seedcrc);
|
||||
if (results[0].execs & ID_LIST)
|
||||
for (i = 0; i < default_num_contexts; i++)
|
||||
ee_printf("[%d]crclist : 0x%04x\n", i, results[i].crclist);
|
||||
if (results[0].execs & ID_MATRIX)
|
||||
for (i = 0; i < default_num_contexts; i++)
|
||||
ee_printf("[%d]crcmatrix : 0x%04x\n", i, results[i].crcmatrix);
|
||||
if (results[0].execs & ID_STATE)
|
||||
for (i = 0; i < default_num_contexts; i++)
|
||||
ee_printf("[%d]crcstate : 0x%04x\n", i, results[i].crcstate);
|
||||
for (i = 0; i < default_num_contexts; i++)
|
||||
ee_printf("[%d]crcfinal : 0x%04x\n", i, results[i].crc);
|
||||
if (total_errors == 0)
|
||||
{
|
||||
ee_printf(
|
||||
"Correct operation validated. See README.md for run and reporting "
|
||||
"rules.\n");
|
||||
#if HAS_FLOAT
|
||||
if (known_id == 3)
|
||||
{
|
||||
ee_printf("CoreMark 1.0 : %f / %s %s",
|
||||
default_num_contexts * results[0].iterations
|
||||
/ time_in_secs(total_time),
|
||||
COMPILER_VERSION,
|
||||
COMPILER_FLAGS);
|
||||
#if defined(MEM_LOCATION) && !defined(MEM_LOCATION_UNSPEC)
|
||||
ee_printf(" / %s", MEM_LOCATION);
|
||||
#else
|
||||
ee_printf(" / %s", mem_name[MEM_METHOD]);
|
||||
#endif
|
||||
|
||||
#if (MULTITHREAD > 1)
|
||||
ee_printf(" / %d:%s", default_num_contexts, PARALLEL_METHOD);
|
||||
#endif
|
||||
ee_printf("\n");
|
||||
}
|
||||
#endif
|
||||
}
|
||||
if (total_errors > 0)
|
||||
ee_printf("Errors detected\n");
|
||||
if (total_errors < 0)
|
||||
ee_printf(
|
||||
"Cannot validate operation for these seed values, please compare "
|
||||
"with results on a known platform.\n");
|
||||
|
||||
#if (MEM_METHOD == MEM_MALLOC)
|
||||
for (i = 0; i < MULTITHREAD; i++)
|
||||
portable_free(results[i].memblock[0]);
|
||||
#endif
|
||||
/* And last call any target specific code for finalizing */
|
||||
portable_fini(&(results[0].port));
|
||||
|
||||
return MAIN_RETURN_VAL;
|
||||
}
|
359
FIRMWARE/COREMARK/core_matrix.c
Normal file
359
FIRMWARE/COREMARK/core_matrix.c
Normal file
@@ -0,0 +1,359 @@
|
||||
/*
|
||||
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
Original Author: Shay Gal-on
|
||||
*/
|
||||
|
||||
#include "coremark.h"
|
||||
/*
|
||||
Topic: Description
|
||||
Matrix manipulation benchmark
|
||||
|
||||
This very simple algorithm forms the basis of many more complex
|
||||
algorithms.
|
||||
|
||||
The tight inner loop is the focus of many optimizations (compiler as
|
||||
well as hardware based) and is thus relevant for embedded processing.
|
||||
|
||||
The total available data space will be divided to 3 parts:
|
||||
NxN Matrix A - initialized with small values (upper 3/4 of the bits all
|
||||
zero). NxN Matrix B - initialized with medium values (upper half of the bits all
|
||||
zero). NxN Matrix C - used for the result.
|
||||
|
||||
The actual values for A and B must be derived based on input that is not
|
||||
available at compile time.
|
||||
*/
|
||||
ee_s16 matrix_test(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B, MATDAT val);
|
||||
ee_s16 matrix_sum(ee_u32 N, MATRES *C, MATDAT clipval);
|
||||
void matrix_mul_const(ee_u32 N, MATRES *C, MATDAT *A, MATDAT val);
|
||||
void matrix_mul_vect(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B);
|
||||
void matrix_mul_matrix(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B);
|
||||
void matrix_mul_matrix_bitextract(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B);
|
||||
void matrix_add_const(ee_u32 N, MATDAT *A, MATDAT val);
|
||||
|
||||
#define matrix_test_next(x) (x + 1)
|
||||
#define matrix_clip(x, y) ((y) ? (x)&0x0ff : (x)&0x0ffff)
|
||||
#define matrix_big(x) (0xf000 | (x))
|
||||
#define bit_extract(x, from, to) (((x) >> (from)) & (~(0xffffffff << (to))))
|
||||
|
||||
#if CORE_DEBUG
|
||||
void
|
||||
printmat(MATDAT *A, ee_u32 N, char *name)
|
||||
{
|
||||
ee_u32 i, j;
|
||||
ee_printf("Matrix %s [%dx%d]:\n", name, N, N);
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
for (j = 0; j < N; j++)
|
||||
{
|
||||
if (j != 0)
|
||||
ee_printf(",");
|
||||
ee_printf("%d", A[i * N + j]);
|
||||
}
|
||||
ee_printf("\n");
|
||||
}
|
||||
}
|
||||
void
|
||||
printmatC(MATRES *C, ee_u32 N, char *name)
|
||||
{
|
||||
ee_u32 i, j;
|
||||
ee_printf("Matrix %s [%dx%d]:\n", name, N, N);
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
for (j = 0; j < N; j++)
|
||||
{
|
||||
if (j != 0)
|
||||
ee_printf(",");
|
||||
ee_printf("%d", C[i * N + j]);
|
||||
}
|
||||
ee_printf("\n");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
/* Function: core_bench_matrix
|
||||
Benchmark function
|
||||
|
||||
Iterate <matrix_test> N times,
|
||||
changing the matrix values slightly by a constant amount each time.
|
||||
*/
|
||||
ee_u16
|
||||
core_bench_matrix(mat_params *p, ee_s16 seed, ee_u16 crc)
|
||||
{
|
||||
ee_u32 N = p->N;
|
||||
MATRES *C = p->C;
|
||||
MATDAT *A = p->A;
|
||||
MATDAT *B = p->B;
|
||||
MATDAT val = (MATDAT)seed;
|
||||
|
||||
crc = crc16(matrix_test(N, C, A, B, val), crc);
|
||||
|
||||
return crc;
|
||||
}
|
||||
|
||||
/* Function: matrix_test
|
||||
Perform matrix manipulation.
|
||||
|
||||
Parameters:
|
||||
N - Dimensions of the matrix.
|
||||
C - memory for result matrix.
|
||||
A - input matrix
|
||||
B - operator matrix (not changed during operations)
|
||||
|
||||
Returns:
|
||||
A CRC value that captures all results calculated in the function.
|
||||
In particular, crc of the value calculated on the result matrix
|
||||
after each step by <matrix_sum>.
|
||||
|
||||
Operation:
|
||||
|
||||
1 - Add a constant value to all elements of a matrix.
|
||||
2 - Multiply a matrix by a constant.
|
||||
3 - Multiply a matrix by a vector.
|
||||
4 - Multiply a matrix by a matrix.
|
||||
5 - Add a constant value to all elements of a matrix.
|
||||
|
||||
After the last step, matrix A is back to original contents.
|
||||
*/
|
||||
ee_s16
|
||||
matrix_test(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B, MATDAT val)
|
||||
{
|
||||
ee_u16 crc = 0;
|
||||
MATDAT clipval = matrix_big(val);
|
||||
|
||||
matrix_add_const(N, A, val); /* make sure data changes */
|
||||
#if CORE_DEBUG
|
||||
printmat(A, N, "matrix_add_const");
|
||||
#endif
|
||||
matrix_mul_const(N, C, A, val);
|
||||
crc = crc16(matrix_sum(N, C, clipval), crc);
|
||||
#if CORE_DEBUG
|
||||
printmatC(C, N, "matrix_mul_const");
|
||||
#endif
|
||||
matrix_mul_vect(N, C, A, B);
|
||||
crc = crc16(matrix_sum(N, C, clipval), crc);
|
||||
#if CORE_DEBUG
|
||||
printmatC(C, N, "matrix_mul_vect");
|
||||
#endif
|
||||
matrix_mul_matrix(N, C, A, B);
|
||||
crc = crc16(matrix_sum(N, C, clipval), crc);
|
||||
#if CORE_DEBUG
|
||||
printmatC(C, N, "matrix_mul_matrix");
|
||||
#endif
|
||||
matrix_mul_matrix_bitextract(N, C, A, B);
|
||||
crc = crc16(matrix_sum(N, C, clipval), crc);
|
||||
#if CORE_DEBUG
|
||||
printmatC(C, N, "matrix_mul_matrix_bitextract");
|
||||
#endif
|
||||
|
||||
matrix_add_const(N, A, -val); /* return matrix to initial value */
|
||||
return crc;
|
||||
}
|
||||
|
||||
/* Function : matrix_init
|
||||
Initialize the memory block for matrix benchmarking.
|
||||
|
||||
Parameters:
|
||||
blksize - Size of memory to be initialized.
|
||||
memblk - Pointer to memory block.
|
||||
seed - Actual values chosen depend on the seed parameter.
|
||||
p - pointers to <mat_params> containing initialized matrixes.
|
||||
|
||||
Returns:
|
||||
Matrix dimensions.
|
||||
|
||||
Note:
|
||||
The seed parameter MUST be supplied from a source that cannot be
|
||||
determined at compile time
|
||||
*/
|
||||
ee_u32
|
||||
core_init_matrix(ee_u32 blksize, void *memblk, ee_s32 seed, mat_params *p)
|
||||
{
|
||||
ee_u32 N = 0;
|
||||
MATDAT *A;
|
||||
MATDAT *B;
|
||||
ee_s32 order = 1;
|
||||
MATDAT val;
|
||||
ee_u32 i = 0, j = 0;
|
||||
if (seed == 0)
|
||||
seed = 1;
|
||||
while (j < blksize)
|
||||
{
|
||||
i++;
|
||||
j = i * i * 2 * 4;
|
||||
}
|
||||
N = i - 1;
|
||||
A = (MATDAT *)align_mem(memblk);
|
||||
B = A + N * N;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
for (j = 0; j < N; j++)
|
||||
{
|
||||
seed = ((order * seed) % 65536);
|
||||
val = (seed + order);
|
||||
val = matrix_clip(val, 0);
|
||||
B[i * N + j] = val;
|
||||
val = (val + order);
|
||||
val = matrix_clip(val, 1);
|
||||
A[i * N + j] = val;
|
||||
order++;
|
||||
}
|
||||
}
|
||||
|
||||
p->A = A;
|
||||
p->B = B;
|
||||
p->C = (MATRES *)align_mem(B + N * N);
|
||||
p->N = N;
|
||||
#if CORE_DEBUG
|
||||
printmat(A, N, "A");
|
||||
printmat(B, N, "B");
|
||||
#endif
|
||||
return N;
|
||||
}
|
||||
|
||||
/* Function: matrix_sum
|
||||
Calculate a function that depends on the values of elements in the
|
||||
matrix.
|
||||
|
||||
For each element, accumulate into a temporary variable.
|
||||
|
||||
As long as this value is under the parameter clipval,
|
||||
add 1 to the result if the element is bigger then the previous.
|
||||
|
||||
Otherwise, reset the accumulator and add 10 to the result.
|
||||
*/
|
||||
ee_s16
|
||||
matrix_sum(ee_u32 N, MATRES *C, MATDAT clipval)
|
||||
{
|
||||
MATRES tmp = 0, prev = 0, cur = 0;
|
||||
ee_s16 ret = 0;
|
||||
ee_u32 i, j;
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
for (j = 0; j < N; j++)
|
||||
{
|
||||
cur = C[i * N + j];
|
||||
tmp += cur;
|
||||
if (tmp > clipval)
|
||||
{
|
||||
ret += 10;
|
||||
tmp = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
ret += (cur > prev) ? 1 : 0;
|
||||
}
|
||||
prev = cur;
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Function: matrix_mul_const
|
||||
Multiply a matrix by a constant.
|
||||
This could be used as a scaler for instance.
|
||||
*/
|
||||
void
|
||||
matrix_mul_const(ee_u32 N, MATRES *C, MATDAT *A, MATDAT val)
|
||||
{
|
||||
ee_u32 i, j;
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
for (j = 0; j < N; j++)
|
||||
{
|
||||
C[i * N + j] = (MATRES)A[i * N + j] * (MATRES)val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Function: matrix_add_const
|
||||
Add a constant value to all elements of a matrix.
|
||||
*/
|
||||
void
|
||||
matrix_add_const(ee_u32 N, MATDAT *A, MATDAT val)
|
||||
{
|
||||
ee_u32 i, j;
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
for (j = 0; j < N; j++)
|
||||
{
|
||||
A[i * N + j] += val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Function: matrix_mul_vect
|
||||
Multiply a matrix by a vector.
|
||||
This is common in many simple filters (e.g. fir where a vector of
|
||||
coefficients is applied to the matrix.)
|
||||
*/
|
||||
void
|
||||
matrix_mul_vect(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B)
|
||||
{
|
||||
ee_u32 i, j;
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
C[i] = 0;
|
||||
for (j = 0; j < N; j++)
|
||||
{
|
||||
C[i] += (MATRES)A[i * N + j] * (MATRES)B[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Function: matrix_mul_matrix
|
||||
Multiply a matrix by a matrix.
|
||||
Basic code is used in many algorithms, mostly with minor changes such as
|
||||
scaling.
|
||||
*/
|
||||
void
|
||||
matrix_mul_matrix(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B)
|
||||
{
|
||||
ee_u32 i, j, k;
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
for (j = 0; j < N; j++)
|
||||
{
|
||||
C[i * N + j] = 0;
|
||||
for (k = 0; k < N; k++)
|
||||
{
|
||||
C[i * N + j] += (MATRES)A[i * N + k] * (MATRES)B[k * N + j];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Function: matrix_mul_matrix_bitextract
|
||||
Multiply a matrix by a matrix, and extract some bits from the result.
|
||||
Basic code is used in many algorithms, mostly with minor changes such as
|
||||
scaling.
|
||||
*/
|
||||
void
|
||||
matrix_mul_matrix_bitextract(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B)
|
||||
{
|
||||
ee_u32 i, j, k;
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
for (j = 0; j < N; j++)
|
||||
{
|
||||
C[i * N + j] = 0;
|
||||
for (k = 0; k < N; k++)
|
||||
{
|
||||
MATRES tmp = (MATRES)A[i * N + k] * (MATRES)B[k * N + j];
|
||||
C[i * N + j] += bit_extract(tmp, 2, 4) * bit_extract(tmp, 5, 7);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
215
FIRMWARE/COREMARK/core_portme.c
Normal file
215
FIRMWARE/COREMARK/core_portme.c
Normal file
@@ -0,0 +1,215 @@
|
||||
/*
|
||||
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
Original Author: Shay Gal-on
|
||||
*/
|
||||
#include <io.h>
|
||||
#include <stdio.h>
|
||||
#include "coremark.h"
|
||||
#include "core_portme.h"
|
||||
#include <perf.h>
|
||||
|
||||
#if VALIDATION_RUN
|
||||
volatile ee_s32 seed1_volatile = 0x3415;
|
||||
volatile ee_s32 seed2_volatile = 0x3415;
|
||||
volatile ee_s32 seed3_volatile = 0x66;
|
||||
#endif
|
||||
#if PERFORMANCE_RUN
|
||||
volatile ee_s32 seed1_volatile = 0x0;
|
||||
volatile ee_s32 seed2_volatile = 0x0;
|
||||
volatile ee_s32 seed3_volatile = 0x66;
|
||||
#endif
|
||||
#if PROFILE_RUN
|
||||
volatile ee_s32 seed1_volatile = 0x8;
|
||||
volatile ee_s32 seed2_volatile = 0x8;
|
||||
volatile ee_s32 seed3_volatile = 0x8;
|
||||
#endif
|
||||
volatile ee_s32 seed4_volatile = ITERATIONS;
|
||||
volatile ee_s32 seed5_volatile = 0;
|
||||
|
||||
/* Porting : Timing functions
|
||||
How to capture time and convert to seconds must be ported to whatever is
|
||||
supported by the platform. e.g. Read value from on board RTC, read value from
|
||||
cpu clock cycles performance counter etc. Sample implementation for standard
|
||||
time.h and windows.h definitions included.
|
||||
*/
|
||||
CORETIMETYPE barebones_clock()
|
||||
{
|
||||
return (CORETIMETYPE)(rdcycle());
|
||||
}
|
||||
|
||||
/* Define : TIMER_RES_DIVIDER
|
||||
Divider to trade off timer resolution and total time that can be
|
||||
measured.
|
||||
|
||||
Use lower values to increase resolution, but make sure that overflow
|
||||
does not occur. If there are issues with the return value overflowing,
|
||||
increase this value.
|
||||
*/
|
||||
#define CLOCKS_PER_SEC 10000000
|
||||
#define GETMYTIME(_t) (*_t = barebones_clock())
|
||||
#define MYTIMEDIFF(fin, ini) ((fin) - (ini))
|
||||
#define TIMER_RES_DIVIDER 1
|
||||
#define SAMPLE_TIME_IMPLEMENTATION 1
|
||||
#define EE_TICKS_PER_SEC (CLOCKS_PER_SEC / TIMER_RES_DIVIDER)
|
||||
|
||||
/** Define Host specific (POSIX), or target specific global time variables. */
|
||||
static CORETIMETYPE start_time_val, stop_time_val;
|
||||
|
||||
/* Function : start_time
|
||||
This function will be called right before starting the timed portion of
|
||||
the benchmark.
|
||||
|
||||
Implementation may be capturing a system timer (as implemented in the
|
||||
example code) or zeroing some system parameters - e.g. setting the cpu clocks
|
||||
cycles to 0.
|
||||
*/
|
||||
void
|
||||
start_time(void)
|
||||
{
|
||||
GETMYTIME(&start_time_val);
|
||||
}
|
||||
/* Function : stop_time
|
||||
This function will be called right after ending the timed portion of the
|
||||
benchmark.
|
||||
|
||||
Implementation may be capturing a system timer (as implemented in the
|
||||
example code) or other system parameters - e.g. reading the current value of
|
||||
cpu cycles counter.
|
||||
*/
|
||||
void
|
||||
stop_time(void)
|
||||
{
|
||||
GETMYTIME(&stop_time_val);
|
||||
}
|
||||
/* Function : get_time
|
||||
Return an abstract "ticks" number that signifies time on the system.
|
||||
|
||||
Actual value returned may be cpu cycles, milliseconds or any other
|
||||
value, as long as it can be converted to seconds by <time_in_secs>. This
|
||||
methodology is taken to accommodate any hardware or simulated platform. The
|
||||
sample implementation returns millisecs by default, and the resolution is
|
||||
controlled by <TIMER_RES_DIVIDER>
|
||||
*/
|
||||
CORE_TICKS
|
||||
get_time(void)
|
||||
{
|
||||
CORE_TICKS elapsed
|
||||
= (CORE_TICKS)(MYTIMEDIFF(stop_time_val, start_time_val));
|
||||
return elapsed;
|
||||
}
|
||||
/* Function : time_in_secs
|
||||
Convert the value returned by get_time to seconds.
|
||||
|
||||
The <secs_ret> type is used to accommodate systems with no support for
|
||||
floating point. Default implementation implemented by the EE_TICKS_PER_SEC
|
||||
macro above.
|
||||
*/
|
||||
secs_ret
|
||||
time_in_secs(CORE_TICKS ticks)
|
||||
{
|
||||
secs_ret retval = ((secs_ret)ticks) / (secs_ret)EE_TICKS_PER_SEC;
|
||||
return retval;
|
||||
}
|
||||
|
||||
ee_u32 default_num_contexts = 1;
|
||||
|
||||
/* Function : portable_init
|
||||
Target specific initialization code
|
||||
Test for some common mistakes.
|
||||
*/
|
||||
void
|
||||
portable_init(core_portable *p, int *argc, char *argv[])
|
||||
{
|
||||
//usleep(100);
|
||||
//io.led = 0xF;
|
||||
|
||||
// ee_printf("board: %s (id=%d)\n",board_name(io.board_id),io.board_id);
|
||||
ee_printf("build: %s for %s\n",BUILD,ARCH);
|
||||
|
||||
// ee_printf("core%d: ", io.core_id); // core id
|
||||
// ee_printf("darkriscv@%dMHz with: ",io.board_cm*2); // board clock MHz
|
||||
// ee_printf("rv32%s ", check4rv32i()?"i":"e"); // architecture
|
||||
ee_printf("\n");
|
||||
// ee_printf("uart0: 115200 bps (div=%d)\n",io.uart.baud);
|
||||
// ee_printf("timr0: frequency=%dHz (io.timer=%d)\n",(io.board_cm*2000000u)/(io.timer+1),io.timer);
|
||||
|
||||
ee_printf("\n\n");
|
||||
|
||||
// ee_printf("CoreMark start in %d us.\n",io.timeus);
|
||||
|
||||
// #error "Call board initialization routines in portable init (if needed), in particular initialize UART!\n"
|
||||
if (sizeof(ee_ptr_int) != sizeof(ee_u8 *))
|
||||
{
|
||||
ee_printf(
|
||||
"ERROR! Please define ee_ptr_int to a type that holds a "
|
||||
"pointer!\n");
|
||||
}
|
||||
if (sizeof(ee_u32) != 4)
|
||||
{
|
||||
ee_printf("ERROR! Please define ee_u32 to a 32b unsigned type!\n");
|
||||
}
|
||||
p->portable_id = 1;
|
||||
}
|
||||
|
||||
|
||||
// Print "fixed point" number (integer/1000)
|
||||
void printk(uint64_t kx) {
|
||||
int intpart = (int)(kx / 1000);
|
||||
int fracpart = (int)(kx % 1000);
|
||||
printf("%d.",intpart);
|
||||
if(fracpart<100) {
|
||||
printf("0");
|
||||
}
|
||||
if(fracpart<10) {
|
||||
printf("0");
|
||||
}
|
||||
printf("%d",fracpart);
|
||||
}
|
||||
|
||||
|
||||
void print_coremarks(uint64_t ticks) {
|
||||
const uint64_t MHz = CLOCKS_PER_SEC/1000000;
|
||||
// printf("*** MHz : %d\n",(int)MHz);
|
||||
printf("*** Ticks : %d\n",(int)ticks);
|
||||
uint64_t ksecs=ticks/(CLOCKS_PER_SEC/1000);
|
||||
// printf("*** Time : "); printk(ksecs); printf("\n");
|
||||
uint64_t kiter_per_sec= (uint64_t)(ITERATIONS*1000*1000)/ksecs;
|
||||
// printf("*** Iter/s : "); printk(kiter_per_sec); printf("\n");
|
||||
printf("*** Coremark/MHz : "); printk(kiter_per_sec/MHz); printf("\n");
|
||||
|
||||
uint64_t kticks2 = rdcycle() * (uint64_t)1000;
|
||||
uint64_t instret2 = rdinstret();
|
||||
printf("*** CPI (2) : "); printk(kticks2/instret2); printf("\n");
|
||||
}
|
||||
|
||||
/* Function : portable_fini
|
||||
Target specific final code
|
||||
*/
|
||||
void
|
||||
portable_fini(core_portable *p)
|
||||
{
|
||||
//io.led = 0;
|
||||
//ee_printf("CoreMark finish in %d us.\n\n",io.timeus);
|
||||
p->portable_id = 0;
|
||||
|
||||
// makes no sense return here!
|
||||
|
||||
//while(1)
|
||||
//{
|
||||
// usleep(500000);
|
||||
// io.led++;
|
||||
//}
|
||||
}
|
225
FIRMWARE/COREMARK/core_portme.h
Normal file
225
FIRMWARE/COREMARK/core_portme.h
Normal file
@@ -0,0 +1,225 @@
|
||||
#pragma once
|
||||
#include <stdint.h>
|
||||
|
||||
/*
|
||||
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
Original Author: Shay Gal-on
|
||||
*/
|
||||
|
||||
#define ITERATIONS 300
|
||||
#define BUILD "testbench"
|
||||
#define ARCH "petituyau"
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
/* Topic : Description
|
||||
This file contains configuration constants required to execute on
|
||||
different platforms
|
||||
*/
|
||||
#ifndef CORE_PORTME_H
|
||||
#define CORE_PORTME_H
|
||||
/************************/
|
||||
/* Data types and settings */
|
||||
/************************/
|
||||
/* Configuration : HAS_FLOAT
|
||||
Define to 1 if the platform supports floating point.
|
||||
*/
|
||||
#ifndef HAS_FLOAT
|
||||
#define HAS_FLOAT 0
|
||||
#endif
|
||||
/* Configuration : HAS_TIME_H
|
||||
Define to 1 if platform has the time.h header file,
|
||||
and implementation of functions thereof.
|
||||
*/
|
||||
#ifndef HAS_TIME_H
|
||||
#define HAS_TIME_H 0
|
||||
#endif
|
||||
/* Configuration : USE_CLOCK
|
||||
Define to 1 if platform has the time.h header file,
|
||||
and implementation of functions thereof.
|
||||
*/
|
||||
#ifndef USE_CLOCK
|
||||
#define USE_CLOCK 0
|
||||
#endif
|
||||
/* Configuration : HAS_STDIO
|
||||
Define to 1 if the platform has stdio.h.
|
||||
*/
|
||||
#ifndef HAS_STDIO
|
||||
#define HAS_STDIO 0
|
||||
#endif
|
||||
/* Configuration : HAS_PRINTF
|
||||
Define to 1 if the platform has stdio.h and implements the printf
|
||||
function.
|
||||
*/
|
||||
#ifndef HAS_PRINTF
|
||||
#define HAS_PRINTF 0
|
||||
#endif
|
||||
|
||||
/* Definitions : COMPILER_VERSION, COMPILER_FLAGS, MEM_LOCATION
|
||||
Initialize these strings per platform
|
||||
*/
|
||||
#ifndef COMPILER_VERSION
|
||||
#ifdef __GNUC__
|
||||
#define COMPILER_VERSION "GCC"__VERSION__
|
||||
#else
|
||||
#define COMPILER_VERSION "Please put compiler version here (e.g. gcc 4.1)"
|
||||
#endif
|
||||
#endif
|
||||
#ifndef COMPILER_FLAGS
|
||||
#define COMPILER_FLAGS "-O2"
|
||||
#endif
|
||||
#ifndef MEM_LOCATION
|
||||
#define MEM_LOCATION "STACK"
|
||||
#endif
|
||||
|
||||
/* Data Types :
|
||||
To avoid compiler issues, define the data types that need ot be used for
|
||||
8b, 16b and 32b in <core_portme.h>.
|
||||
|
||||
*Imprtant* :
|
||||
ee_ptr_int needs to be the data type used to hold pointers, otherwise
|
||||
coremark may fail!!!
|
||||
*/
|
||||
typedef signed short ee_s16;
|
||||
typedef unsigned short ee_u16;
|
||||
typedef signed int ee_s32;
|
||||
typedef double ee_f32;
|
||||
typedef unsigned char ee_u8;
|
||||
typedef unsigned int ee_u32;
|
||||
typedef ee_u32 ee_ptr_int;
|
||||
typedef size_t ee_size_t;
|
||||
#define NULL ((void *)0)
|
||||
/* align_mem :
|
||||
This macro is used to align an offset to point to a 32b value. It is
|
||||
used in the Matrix algorithm to initialize the input memory blocks.
|
||||
*/
|
||||
#define align_mem(x) (void *)(4 + (((ee_ptr_int)(x)-1) & ~3))
|
||||
|
||||
/* Configuration : CORE_TICKS
|
||||
Define type of return from the timing functions.
|
||||
*/
|
||||
//#define CORETIMETYPE ee_u32
|
||||
//typedef ee_u32 CORE_TICKS;
|
||||
|
||||
#define CORETIMETYPE uint64_t
|
||||
typedef uint64_t CORE_TICKS;
|
||||
|
||||
|
||||
/* Configuration : SEED_METHOD
|
||||
Defines method to get seed values that cannot be computed at compile
|
||||
time.
|
||||
|
||||
Valid values :
|
||||
SEED_ARG - from command line.
|
||||
SEED_FUNC - from a system function.
|
||||
SEED_VOLATILE - from volatile variables.
|
||||
*/
|
||||
#ifndef SEED_METHOD
|
||||
#define SEED_METHOD SEED_VOLATILE
|
||||
#endif
|
||||
|
||||
/* Configuration : MEM_METHOD
|
||||
Defines method to get a block of memry.
|
||||
|
||||
Valid values :
|
||||
MEM_MALLOC - for platforms that implement malloc and have malloc.h.
|
||||
MEM_STATIC - to use a static memory array.
|
||||
MEM_STACK - to allocate the data block on the stack (NYI).
|
||||
*/
|
||||
#ifndef MEM_METHOD
|
||||
#define MEM_METHOD MEM_STACK
|
||||
#endif
|
||||
|
||||
/* Configuration : MULTITHREAD
|
||||
Define for parallel execution
|
||||
|
||||
Valid values :
|
||||
1 - only one context (default).
|
||||
N>1 - will execute N copies in parallel.
|
||||
|
||||
Note :
|
||||
If this flag is defined to more then 1, an implementation for launching
|
||||
parallel contexts must be defined.
|
||||
|
||||
Two sample implementations are provided. Use <USE_PTHREAD> or <USE_FORK>
|
||||
to enable them.
|
||||
|
||||
It is valid to have a different implementation of <core_start_parallel>
|
||||
and <core_end_parallel> in <core_portme.c>, to fit a particular architecture.
|
||||
*/
|
||||
#ifndef MULTITHREAD
|
||||
#define MULTITHREAD 1
|
||||
#define USE_PTHREAD 0
|
||||
#define USE_FORK 0
|
||||
#define USE_SOCKET 0
|
||||
#endif
|
||||
|
||||
/* Configuration : MAIN_HAS_NOARGC
|
||||
Needed if platform does not support getting arguments to main.
|
||||
|
||||
Valid values :
|
||||
0 - argc/argv to main is supported
|
||||
1 - argc/argv to main is not supported
|
||||
|
||||
Note :
|
||||
This flag only matters if MULTITHREAD has been defined to a value
|
||||
greater then 1.
|
||||
*/
|
||||
#ifndef MAIN_HAS_NOARGC
|
||||
#define MAIN_HAS_NOARGC 1
|
||||
#endif
|
||||
|
||||
/* Configuration : MAIN_HAS_NORETURN
|
||||
Needed if platform does not support returning a value from main.
|
||||
|
||||
Valid values :
|
||||
0 - main returns an int, and return value will be 0.
|
||||
1 - platform does not support returning a value from main
|
||||
*/
|
||||
#ifndef MAIN_HAS_NORETURN
|
||||
#define MAIN_HAS_NORETURN 0
|
||||
#endif
|
||||
|
||||
/* Variable : default_num_contexts
|
||||
Not used for this simple port, must contain the value 1.
|
||||
*/
|
||||
extern ee_u32 default_num_contexts;
|
||||
|
||||
typedef struct CORE_PORTABLE_S
|
||||
{
|
||||
ee_u8 portable_id;
|
||||
} core_portable;
|
||||
|
||||
/* target specific init/fini */
|
||||
void portable_init(core_portable *p, int *argc, char *argv[]);
|
||||
void portable_fini(core_portable *p);
|
||||
|
||||
#if !defined(PROFILE_RUN) && !defined(PERFORMANCE_RUN) \
|
||||
&& !defined(VALIDATION_RUN)
|
||||
#if (TOTAL_DATA_SIZE == 1200)
|
||||
#define PROFILE_RUN 1
|
||||
#elif (TOTAL_DATA_SIZE == 2000)
|
||||
#define PERFORMANCE_RUN 1
|
||||
#else
|
||||
#define VALIDATION_RUN 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
int ee_printf(const char *fmt, ...);
|
||||
void print_coremarks(uint64_t ticks);
|
||||
|
||||
#endif /* CORE_PORTME_H */
|
||||
|
330
FIRMWARE/COREMARK/core_state.c
Normal file
330
FIRMWARE/COREMARK/core_state.c
Normal file
@@ -0,0 +1,330 @@
|
||||
/*
|
||||
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
Original Author: Shay Gal-on
|
||||
*/
|
||||
|
||||
#include "coremark.h"
|
||||
/* local functions */
|
||||
enum CORE_STATE core_state_transition(ee_u8 **instr, ee_u32 *transition_count);
|
||||
|
||||
/*
|
||||
Topic: Description
|
||||
Simple state machines like this one are used in many embedded products.
|
||||
|
||||
For more complex state machines, sometimes a state transition table
|
||||
implementation is used instead, trading speed of direct coding for ease of
|
||||
maintenance.
|
||||
|
||||
Since the main goal of using a state machine in CoreMark is to excercise
|
||||
the switch/if behaviour, we are using a small moore machine.
|
||||
|
||||
In particular, this machine tests type of string input,
|
||||
trying to determine whether the input is a number or something else.
|
||||
(see core_state.png).
|
||||
*/
|
||||
|
||||
/* Function: core_bench_state
|
||||
Benchmark function
|
||||
|
||||
Go over the input twice, once direct, and once after introducing some
|
||||
corruption.
|
||||
*/
|
||||
ee_u16
|
||||
core_bench_state(ee_u32 blksize,
|
||||
ee_u8 *memblock,
|
||||
ee_s16 seed1,
|
||||
ee_s16 seed2,
|
||||
ee_s16 step,
|
||||
ee_u16 crc)
|
||||
{
|
||||
ee_u32 final_counts[NUM_CORE_STATES];
|
||||
ee_u32 track_counts[NUM_CORE_STATES];
|
||||
ee_u8 *p = memblock;
|
||||
ee_u32 i;
|
||||
|
||||
#if CORE_DEBUG
|
||||
ee_printf("State Bench: %d,%d,%d,%04x\n", seed1, seed2, step, crc);
|
||||
#endif
|
||||
for (i = 0; i < NUM_CORE_STATES; i++)
|
||||
{
|
||||
final_counts[i] = track_counts[i] = 0;
|
||||
}
|
||||
/* run the state machine over the input */
|
||||
while (*p != 0)
|
||||
{
|
||||
enum CORE_STATE fstate = core_state_transition(&p, track_counts);
|
||||
final_counts[fstate]++;
|
||||
#if CORE_DEBUG
|
||||
ee_printf("%d,", fstate);
|
||||
}
|
||||
ee_printf("\n");
|
||||
#else
|
||||
}
|
||||
#endif
|
||||
p = memblock;
|
||||
while (p < (memblock + blksize))
|
||||
{ /* insert some corruption */
|
||||
if (*p != ',')
|
||||
*p ^= (ee_u8)seed1;
|
||||
p += step;
|
||||
}
|
||||
p = memblock;
|
||||
/* run the state machine over the input again */
|
||||
while (*p != 0)
|
||||
{
|
||||
enum CORE_STATE fstate = core_state_transition(&p, track_counts);
|
||||
final_counts[fstate]++;
|
||||
#if CORE_DEBUG
|
||||
ee_printf("%d,", fstate);
|
||||
}
|
||||
ee_printf("\n");
|
||||
#else
|
||||
}
|
||||
#endif
|
||||
p = memblock;
|
||||
while (p < (memblock + blksize))
|
||||
{ /* undo corruption is seed1 and seed2 are equal */
|
||||
if (*p != ',')
|
||||
*p ^= (ee_u8)seed2;
|
||||
p += step;
|
||||
}
|
||||
/* end timing */
|
||||
for (i = 0; i < NUM_CORE_STATES; i++)
|
||||
{
|
||||
crc = crcu32(final_counts[i], crc);
|
||||
crc = crcu32(track_counts[i], crc);
|
||||
}
|
||||
return crc;
|
||||
}
|
||||
|
||||
/* Default initialization patterns */
|
||||
static ee_u8 *intpat[4]
|
||||
= { (ee_u8 *)"5012", (ee_u8 *)"1234", (ee_u8 *)"-874", (ee_u8 *)"+122" };
|
||||
static ee_u8 *floatpat[4] = { (ee_u8 *)"35.54400",
|
||||
(ee_u8 *)".1234500",
|
||||
(ee_u8 *)"-110.700",
|
||||
(ee_u8 *)"+0.64400" };
|
||||
static ee_u8 *scipat[4] = { (ee_u8 *)"5.500e+3",
|
||||
(ee_u8 *)"-.123e-2",
|
||||
(ee_u8 *)"-87e+832",
|
||||
(ee_u8 *)"+0.6e-12" };
|
||||
static ee_u8 *errpat[4] = { (ee_u8 *)"T0.3e-1F",
|
||||
(ee_u8 *)"-T.T++Tq",
|
||||
(ee_u8 *)"1T3.4e4z",
|
||||
(ee_u8 *)"34.0e-T^" };
|
||||
|
||||
/* Function: core_init_state
|
||||
Initialize the input data for the state machine.
|
||||
|
||||
Populate the input with several predetermined strings, interspersed.
|
||||
Actual patterns chosen depend on the seed parameter.
|
||||
|
||||
Note:
|
||||
The seed parameter MUST be supplied from a source that cannot be
|
||||
determined at compile time
|
||||
*/
|
||||
void
|
||||
core_init_state(ee_u32 size, ee_s16 seed, ee_u8 *p)
|
||||
{
|
||||
ee_u32 total = 0, next = 0, i;
|
||||
ee_u8 *buf = 0;
|
||||
#if CORE_DEBUG
|
||||
ee_u8 *start = p;
|
||||
ee_printf("State: %d,%d\n", size, seed);
|
||||
#endif
|
||||
size--;
|
||||
next = 0;
|
||||
while ((total + next + 1) < size)
|
||||
{
|
||||
if (next > 0)
|
||||
{
|
||||
for (i = 0; i < next; i++)
|
||||
*(p + total + i) = buf[i];
|
||||
*(p + total + i) = ',';
|
||||
total += next + 1;
|
||||
}
|
||||
seed++;
|
||||
switch (seed & 0x7)
|
||||
{
|
||||
case 0: /* int */
|
||||
case 1: /* int */
|
||||
case 2: /* int */
|
||||
buf = intpat[(seed >> 3) & 0x3];
|
||||
next = 4;
|
||||
break;
|
||||
case 3: /* float */
|
||||
case 4: /* float */
|
||||
buf = floatpat[(seed >> 3) & 0x3];
|
||||
next = 8;
|
||||
break;
|
||||
case 5: /* scientific */
|
||||
case 6: /* scientific */
|
||||
buf = scipat[(seed >> 3) & 0x3];
|
||||
next = 8;
|
||||
break;
|
||||
case 7: /* invalid */
|
||||
buf = errpat[(seed >> 3) & 0x3];
|
||||
next = 8;
|
||||
break;
|
||||
default: /* Never happen, just to make some compilers happy */
|
||||
break;
|
||||
}
|
||||
}
|
||||
size++;
|
||||
while (total < size)
|
||||
{ /* fill the rest with 0 */
|
||||
*(p + total) = 0;
|
||||
total++;
|
||||
}
|
||||
#if CORE_DEBUG
|
||||
ee_printf("State Input: %s\n", start);
|
||||
#endif
|
||||
}
|
||||
|
||||
static ee_u8
|
||||
ee_isdigit(ee_u8 c)
|
||||
{
|
||||
ee_u8 retval;
|
||||
retval = ((c >= '0') & (c <= '9')) ? 1 : 0;
|
||||
return retval;
|
||||
}
|
||||
|
||||
/* Function: core_state_transition
|
||||
Actual state machine.
|
||||
|
||||
The state machine will continue scanning until either:
|
||||
1 - an invalid input is detected.
|
||||
2 - a valid number has been detected.
|
||||
|
||||
The input pointer is updated to point to the end of the token, and the
|
||||
end state is returned (either specific format determined or invalid).
|
||||
*/
|
||||
|
||||
enum CORE_STATE
|
||||
core_state_transition(ee_u8 **instr, ee_u32 *transition_count)
|
||||
{
|
||||
ee_u8 * str = *instr;
|
||||
ee_u8 NEXT_SYMBOL;
|
||||
enum CORE_STATE state = CORE_START;
|
||||
for (; *str && state != CORE_INVALID; str++)
|
||||
{
|
||||
NEXT_SYMBOL = *str;
|
||||
if (NEXT_SYMBOL == ',') /* end of this input */
|
||||
{
|
||||
str++;
|
||||
break;
|
||||
}
|
||||
switch (state)
|
||||
{
|
||||
case CORE_START:
|
||||
if (ee_isdigit(NEXT_SYMBOL))
|
||||
{
|
||||
state = CORE_INT;
|
||||
}
|
||||
else if (NEXT_SYMBOL == '+' || NEXT_SYMBOL == '-')
|
||||
{
|
||||
state = CORE_S1;
|
||||
}
|
||||
else if (NEXT_SYMBOL == '.')
|
||||
{
|
||||
state = CORE_FLOAT;
|
||||
}
|
||||
else
|
||||
{
|
||||
state = CORE_INVALID;
|
||||
transition_count[CORE_INVALID]++;
|
||||
}
|
||||
transition_count[CORE_START]++;
|
||||
break;
|
||||
case CORE_S1:
|
||||
if (ee_isdigit(NEXT_SYMBOL))
|
||||
{
|
||||
state = CORE_INT;
|
||||
transition_count[CORE_S1]++;
|
||||
}
|
||||
else if (NEXT_SYMBOL == '.')
|
||||
{
|
||||
state = CORE_FLOAT;
|
||||
transition_count[CORE_S1]++;
|
||||
}
|
||||
else
|
||||
{
|
||||
state = CORE_INVALID;
|
||||
transition_count[CORE_S1]++;
|
||||
}
|
||||
break;
|
||||
case CORE_INT:
|
||||
if (NEXT_SYMBOL == '.')
|
||||
{
|
||||
state = CORE_FLOAT;
|
||||
transition_count[CORE_INT]++;
|
||||
}
|
||||
else if (!ee_isdigit(NEXT_SYMBOL))
|
||||
{
|
||||
state = CORE_INVALID;
|
||||
transition_count[CORE_INT]++;
|
||||
}
|
||||
break;
|
||||
case CORE_FLOAT:
|
||||
if (NEXT_SYMBOL == 'E' || NEXT_SYMBOL == 'e')
|
||||
{
|
||||
state = CORE_S2;
|
||||
transition_count[CORE_FLOAT]++;
|
||||
}
|
||||
else if (!ee_isdigit(NEXT_SYMBOL))
|
||||
{
|
||||
state = CORE_INVALID;
|
||||
transition_count[CORE_FLOAT]++;
|
||||
}
|
||||
break;
|
||||
case CORE_S2:
|
||||
if (NEXT_SYMBOL == '+' || NEXT_SYMBOL == '-')
|
||||
{
|
||||
state = CORE_EXPONENT;
|
||||
transition_count[CORE_S2]++;
|
||||
}
|
||||
else
|
||||
{
|
||||
state = CORE_INVALID;
|
||||
transition_count[CORE_S2]++;
|
||||
}
|
||||
break;
|
||||
case CORE_EXPONENT:
|
||||
if (ee_isdigit(NEXT_SYMBOL))
|
||||
{
|
||||
state = CORE_SCIENTIFIC;
|
||||
transition_count[CORE_EXPONENT]++;
|
||||
}
|
||||
else
|
||||
{
|
||||
state = CORE_INVALID;
|
||||
transition_count[CORE_EXPONENT]++;
|
||||
}
|
||||
break;
|
||||
case CORE_SCIENTIFIC:
|
||||
if (!ee_isdigit(NEXT_SYMBOL))
|
||||
{
|
||||
state = CORE_INVALID;
|
||||
transition_count[CORE_INVALID]++;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
*instr = str;
|
||||
return state;
|
||||
}
|
249
FIRMWARE/COREMARK/core_util.c
Normal file
249
FIRMWARE/COREMARK/core_util.c
Normal file
@@ -0,0 +1,249 @@
|
||||
/*
|
||||
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
Original Author: Shay Gal-on
|
||||
*/
|
||||
|
||||
#include "coremark.h"
|
||||
/* Function: get_seed
|
||||
Get a values that cannot be determined at compile time.
|
||||
|
||||
Since different embedded systems and compilers are used, 3 different
|
||||
methods are provided: 1 - Using a volatile variable. This method is only
|
||||
valid if the compiler is forced to generate code that reads the value of a
|
||||
volatile variable from memory at run time. Please note, if using this method,
|
||||
you would need to modify core_portme.c to generate training profile. 2 -
|
||||
Command line arguments. This is the preferred method if command line
|
||||
arguments are supported. 3 - System function. If none of the first 2 methods
|
||||
is available on the platform, a system function which is not a stub can be
|
||||
used.
|
||||
|
||||
e.g. read the value on GPIO pins connected to switches, or invoke
|
||||
special simulator functions.
|
||||
*/
|
||||
#if (SEED_METHOD == SEED_VOLATILE)
|
||||
extern volatile ee_s32 seed1_volatile;
|
||||
extern volatile ee_s32 seed2_volatile;
|
||||
extern volatile ee_s32 seed3_volatile;
|
||||
extern volatile ee_s32 seed4_volatile;
|
||||
extern volatile ee_s32 seed5_volatile;
|
||||
ee_s32
|
||||
get_seed_32(int i)
|
||||
{
|
||||
ee_s32 retval;
|
||||
switch (i)
|
||||
{
|
||||
case 1:
|
||||
retval = seed1_volatile;
|
||||
break;
|
||||
case 2:
|
||||
retval = seed2_volatile;
|
||||
break;
|
||||
case 3:
|
||||
retval = seed3_volatile;
|
||||
break;
|
||||
case 4:
|
||||
retval = seed4_volatile;
|
||||
break;
|
||||
case 5:
|
||||
retval = seed5_volatile;
|
||||
break;
|
||||
default:
|
||||
retval = 0;
|
||||
break;
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
#elif (SEED_METHOD == SEED_ARG)
|
||||
ee_s32
|
||||
parseval(char *valstring)
|
||||
{
|
||||
ee_s32 retval = 0;
|
||||
ee_s32 neg = 1;
|
||||
int hexmode = 0;
|
||||
if (*valstring == '-')
|
||||
{
|
||||
neg = -1;
|
||||
valstring++;
|
||||
}
|
||||
if ((valstring[0] == '0') && (valstring[1] == 'x'))
|
||||
{
|
||||
hexmode = 1;
|
||||
valstring += 2;
|
||||
}
|
||||
/* first look for digits */
|
||||
if (hexmode)
|
||||
{
|
||||
while (((*valstring >= '0') && (*valstring <= '9'))
|
||||
|| ((*valstring >= 'a') && (*valstring <= 'f')))
|
||||
{
|
||||
ee_s32 digit = *valstring - '0';
|
||||
if (digit > 9)
|
||||
digit = 10 + *valstring - 'a';
|
||||
retval *= 16;
|
||||
retval += digit;
|
||||
valstring++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while ((*valstring >= '0') && (*valstring <= '9'))
|
||||
{
|
||||
ee_s32 digit = *valstring - '0';
|
||||
retval *= 10;
|
||||
retval += digit;
|
||||
valstring++;
|
||||
}
|
||||
}
|
||||
/* now add qualifiers */
|
||||
if (*valstring == 'K')
|
||||
retval *= 1024;
|
||||
if (*valstring == 'M')
|
||||
retval *= 1024 * 1024;
|
||||
|
||||
retval *= neg;
|
||||
return retval;
|
||||
}
|
||||
|
||||
ee_s32
|
||||
get_seed_args(int i, int argc, char *argv[])
|
||||
{
|
||||
if (argc > i)
|
||||
return parseval(argv[i]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#elif (SEED_METHOD == SEED_FUNC)
|
||||
/* If using OS based function, you must define and implement the functions below
|
||||
* in core_portme.h and core_portme.c ! */
|
||||
ee_s32
|
||||
get_seed_32(int i)
|
||||
{
|
||||
ee_s32 retval;
|
||||
switch (i)
|
||||
{
|
||||
case 1:
|
||||
retval = portme_sys1();
|
||||
break;
|
||||
case 2:
|
||||
retval = portme_sys2();
|
||||
break;
|
||||
case 3:
|
||||
retval = portme_sys3();
|
||||
break;
|
||||
case 4:
|
||||
retval = portme_sys4();
|
||||
break;
|
||||
case 5:
|
||||
retval = portme_sys5();
|
||||
break;
|
||||
default:
|
||||
retval = 0;
|
||||
break;
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Function: crc*
|
||||
Service functions to calculate 16b CRC code.
|
||||
|
||||
*/
|
||||
ee_u16
|
||||
crcu8(ee_u8 data, ee_u16 crc)
|
||||
{
|
||||
ee_u8 i = 0, x16 = 0, carry = 0;
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
{
|
||||
x16 = (ee_u8)((data & 1) ^ ((ee_u8)crc & 1));
|
||||
data >>= 1;
|
||||
|
||||
if (x16 == 1)
|
||||
{
|
||||
crc ^= 0x4002;
|
||||
carry = 1;
|
||||
}
|
||||
else
|
||||
carry = 0;
|
||||
crc >>= 1;
|
||||
if (carry)
|
||||
crc |= 0x8000;
|
||||
else
|
||||
crc &= 0x7fff;
|
||||
}
|
||||
return crc;
|
||||
}
|
||||
ee_u16
|
||||
crcu16(ee_u16 newval, ee_u16 crc)
|
||||
{
|
||||
crc = crcu8((ee_u8)(newval), crc);
|
||||
crc = crcu8((ee_u8)((newval) >> 8), crc);
|
||||
return crc;
|
||||
}
|
||||
ee_u16
|
||||
crcu32(ee_u32 newval, ee_u16 crc)
|
||||
{
|
||||
crc = crc16((ee_s16)newval, crc);
|
||||
crc = crc16((ee_s16)(newval >> 16), crc);
|
||||
return crc;
|
||||
}
|
||||
ee_u16
|
||||
crc16(ee_s16 newval, ee_u16 crc)
|
||||
{
|
||||
return crcu16((ee_u16)newval, crc);
|
||||
}
|
||||
|
||||
ee_u8
|
||||
check_data_types()
|
||||
{
|
||||
ee_u8 retval = 0;
|
||||
if (sizeof(ee_u8) != 1)
|
||||
{
|
||||
ee_printf("ERROR: ee_u8 is not an 8b datatype!\n");
|
||||
retval++;
|
||||
}
|
||||
if (sizeof(ee_u16) != 2)
|
||||
{
|
||||
ee_printf("ERROR: ee_u16 is not a 16b datatype!\n");
|
||||
retval++;
|
||||
}
|
||||
if (sizeof(ee_s16) != 2)
|
||||
{
|
||||
ee_printf("ERROR: ee_s16 is not a 16b datatype!\n");
|
||||
retval++;
|
||||
}
|
||||
if (sizeof(ee_s32) != 4)
|
||||
{
|
||||
ee_printf("ERROR: ee_s32 is not a 32b datatype!\n");
|
||||
retval++;
|
||||
}
|
||||
if (sizeof(ee_u32) != 4)
|
||||
{
|
||||
ee_printf("ERROR: ee_u32 is not a 32b datatype!\n");
|
||||
retval++;
|
||||
}
|
||||
if (sizeof(ee_ptr_int) != sizeof(int *))
|
||||
{
|
||||
ee_printf(
|
||||
"ERROR: ee_ptr_int is not a datatype that holds an int pointer!\n");
|
||||
retval++;
|
||||
}
|
||||
if (retval > 0)
|
||||
{
|
||||
ee_printf("ERROR: Please modify the datatypes in core_portme.h!\n");
|
||||
}
|
||||
return retval;
|
||||
}
|
184
FIRMWARE/COREMARK/coremark.h
Normal file
184
FIRMWARE/COREMARK/coremark.h
Normal file
@@ -0,0 +1,184 @@
|
||||
#pragma once
|
||||
/*
|
||||
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
Original Author: Shay Gal-on
|
||||
*/
|
||||
|
||||
/* Topic: Description
|
||||
This file contains declarations of the various benchmark functions.
|
||||
*/
|
||||
|
||||
/* Configuration: TOTAL_DATA_SIZE
|
||||
Define total size for data algorithms will operate on
|
||||
*/
|
||||
#ifndef TOTAL_DATA_SIZE
|
||||
#define TOTAL_DATA_SIZE 2 * 1000
|
||||
#endif
|
||||
|
||||
#define SEED_ARG 0
|
||||
#define SEED_FUNC 1
|
||||
#define SEED_VOLATILE 2
|
||||
|
||||
#define MEM_STATIC 0
|
||||
#define MEM_MALLOC 1
|
||||
#define MEM_STACK 2
|
||||
|
||||
#include "core_portme.h"
|
||||
|
||||
#if HAS_STDIO
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
#if HAS_PRINTF
|
||||
#define ee_printf printf
|
||||
#endif
|
||||
|
||||
/* Actual benchmark execution in iterate */
|
||||
void *iterate(void *pres);
|
||||
|
||||
/* Typedef: secs_ret
|
||||
For machines that have floating point support, get number of seconds as
|
||||
a double. Otherwise an unsigned int.
|
||||
*/
|
||||
#if HAS_FLOAT
|
||||
typedef double secs_ret;
|
||||
#else
|
||||
typedef ee_u32 secs_ret;
|
||||
#endif
|
||||
|
||||
#if MAIN_HAS_NORETURN
|
||||
#define MAIN_RETURN_VAL
|
||||
#define MAIN_RETURN_TYPE void
|
||||
#else
|
||||
#define MAIN_RETURN_VAL 0
|
||||
#define MAIN_RETURN_TYPE int
|
||||
#endif
|
||||
|
||||
void start_time(void);
|
||||
void stop_time(void);
|
||||
CORE_TICKS get_time(void);
|
||||
secs_ret time_in_secs(CORE_TICKS ticks);
|
||||
|
||||
/* Misc useful functions */
|
||||
ee_u16 crcu8(ee_u8 data, ee_u16 crc);
|
||||
ee_u16 crc16(ee_s16 newval, ee_u16 crc);
|
||||
ee_u16 crcu16(ee_u16 newval, ee_u16 crc);
|
||||
ee_u16 crcu32(ee_u32 newval, ee_u16 crc);
|
||||
ee_u8 check_data_types(void);
|
||||
void * portable_malloc(ee_size_t size);
|
||||
void portable_free(void *p);
|
||||
ee_s32 parseval(char *valstring);
|
||||
|
||||
/* Algorithm IDS */
|
||||
#define ID_LIST (1 << 0)
|
||||
#define ID_MATRIX (1 << 1)
|
||||
#define ID_STATE (1 << 2)
|
||||
#define ALL_ALGORITHMS_MASK (ID_LIST | ID_MATRIX | ID_STATE)
|
||||
#define NUM_ALGORITHMS 3
|
||||
|
||||
/* list data structures */
|
||||
typedef struct list_data_s
|
||||
{
|
||||
ee_s16 data16;
|
||||
ee_s16 idx;
|
||||
} list_data;
|
||||
|
||||
typedef struct list_head_s
|
||||
{
|
||||
struct list_head_s *next;
|
||||
struct list_data_s *info;
|
||||
} list_head;
|
||||
|
||||
/*matrix benchmark related stuff */
|
||||
#define MATDAT_INT 1
|
||||
#if MATDAT_INT
|
||||
typedef ee_s16 MATDAT;
|
||||
typedef ee_s32 MATRES;
|
||||
#else
|
||||
typedef ee_f16 MATDAT;
|
||||
typedef ee_f32 MATRES;
|
||||
#endif
|
||||
|
||||
typedef struct MAT_PARAMS_S
|
||||
{
|
||||
int N;
|
||||
MATDAT *A;
|
||||
MATDAT *B;
|
||||
MATRES *C;
|
||||
} mat_params;
|
||||
|
||||
/* state machine related stuff */
|
||||
/* List of all the possible states for the FSM */
|
||||
typedef enum CORE_STATE
|
||||
{
|
||||
CORE_START = 0,
|
||||
CORE_INVALID,
|
||||
CORE_S1,
|
||||
CORE_S2,
|
||||
CORE_INT,
|
||||
CORE_FLOAT,
|
||||
CORE_EXPONENT,
|
||||
CORE_SCIENTIFIC,
|
||||
NUM_CORE_STATES
|
||||
} core_state_e;
|
||||
|
||||
/* Helper structure to hold results */
|
||||
typedef struct RESULTS_S
|
||||
{
|
||||
/* inputs */
|
||||
ee_s16 seed1; /* Initializing seed */
|
||||
ee_s16 seed2; /* Initializing seed */
|
||||
ee_s16 seed3; /* Initializing seed */
|
||||
void * memblock[4]; /* Pointer to safe memory location */
|
||||
ee_u32 size; /* Size of the data */
|
||||
ee_u32 iterations; /* Number of iterations to execute */
|
||||
ee_u32 execs; /* Bitmask of operations to execute */
|
||||
struct list_head_s *list;
|
||||
mat_params mat;
|
||||
/* outputs */
|
||||
ee_u16 crc;
|
||||
ee_u16 crclist;
|
||||
ee_u16 crcmatrix;
|
||||
ee_u16 crcstate;
|
||||
ee_s16 err;
|
||||
/* ultithread specific */
|
||||
core_portable port;
|
||||
} core_results;
|
||||
|
||||
/* Multicore execution handling */
|
||||
#if (MULTITHREAD > 1)
|
||||
ee_u8 core_start_parallel(core_results *res);
|
||||
ee_u8 core_stop_parallel(core_results *res);
|
||||
#endif
|
||||
|
||||
/* list benchmark functions */
|
||||
list_head *core_list_init(ee_u32 blksize, list_head *memblock, ee_s16 seed);
|
||||
ee_u16 core_bench_list(core_results *res, ee_s16 finder_idx);
|
||||
|
||||
/* state benchmark functions */
|
||||
void core_init_state(ee_u32 size, ee_s16 seed, ee_u8 *p);
|
||||
ee_u16 core_bench_state(ee_u32 blksize,
|
||||
ee_u8 *memblock,
|
||||
ee_s16 seed1,
|
||||
ee_s16 seed2,
|
||||
ee_s16 step,
|
||||
ee_u16 crc);
|
||||
|
||||
/* matrix benchmark functions */
|
||||
ee_u32 core_init_matrix(ee_u32 blksize,
|
||||
void * memblk,
|
||||
ee_s32 seed,
|
||||
mat_params *p);
|
||||
ee_u16 core_bench_matrix(mat_params *p, ee_s16 seed, ee_u16 crc);
|
712
FIRMWARE/COREMARK/ee_printf.c
Normal file
712
FIRMWARE/COREMARK/ee_printf.c
Normal file
@@ -0,0 +1,712 @@
|
||||
/*
|
||||
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
#include <io.h>
|
||||
#include "coremark.h"
|
||||
#include <stdarg.h>
|
||||
|
||||
#define ZEROPAD (1 << 0) /* Pad with zero */
|
||||
#define SIGN (1 << 1) /* Unsigned/signed long */
|
||||
#define PLUS (1 << 2) /* Show plus */
|
||||
#define SPACE (1 << 3) /* Spacer */
|
||||
#define LEFT (1 << 4) /* Left justified */
|
||||
#define HEX_PREP (1 << 5) /* 0x */
|
||||
#define UPPERCASE (1 << 6) /* 'ABCDEF' */
|
||||
|
||||
#define is_digit(c) ((c) >= '0' && (c) <= '9')
|
||||
|
||||
static char * digits = "0123456789abcdefghijklmnopqrstuvwxyz";
|
||||
static char * upper_digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
|
||||
static ee_size_t strnlen(const char *s, ee_size_t count);
|
||||
|
||||
static ee_size_t
|
||||
strnlen(const char *s, ee_size_t count)
|
||||
{
|
||||
const char *sc;
|
||||
for (sc = s; *sc != '\0' && count--; ++sc)
|
||||
;
|
||||
return sc - s;
|
||||
}
|
||||
|
||||
static int
|
||||
skip_atoi(const char **s)
|
||||
{
|
||||
int i = 0;
|
||||
while (is_digit(**s))
|
||||
i = i * 10 + *((*s)++) - '0';
|
||||
return i;
|
||||
}
|
||||
|
||||
static char *
|
||||
number(char *str, long num, int base, int size, int precision, int type)
|
||||
{
|
||||
char c, sign, tmp[66];
|
||||
char *dig = digits;
|
||||
int i;
|
||||
|
||||
if (type & UPPERCASE)
|
||||
dig = upper_digits;
|
||||
if (type & LEFT)
|
||||
type &= ~ZEROPAD;
|
||||
if (base < 2 || base > 36)
|
||||
return 0;
|
||||
|
||||
c = (type & ZEROPAD) ? '0' : ' ';
|
||||
sign = 0;
|
||||
if (type & SIGN)
|
||||
{
|
||||
if (num < 0)
|
||||
{
|
||||
sign = '-';
|
||||
num = -num;
|
||||
size--;
|
||||
}
|
||||
else if (type & PLUS)
|
||||
{
|
||||
sign = '+';
|
||||
size--;
|
||||
}
|
||||
else if (type & SPACE)
|
||||
{
|
||||
sign = ' ';
|
||||
size--;
|
||||
}
|
||||
}
|
||||
|
||||
if (type & HEX_PREP)
|
||||
{
|
||||
if (base == 16)
|
||||
size -= 2;
|
||||
else if (base == 8)
|
||||
size--;
|
||||
}
|
||||
|
||||
i = 0;
|
||||
|
||||
if (num == 0)
|
||||
tmp[i++] = '0';
|
||||
else
|
||||
{
|
||||
while (num != 0)
|
||||
{
|
||||
tmp[i++] = dig[((unsigned long)num) % (unsigned)base];
|
||||
num = ((unsigned long)num) / (unsigned)base;
|
||||
}
|
||||
}
|
||||
|
||||
if (i > precision)
|
||||
precision = i;
|
||||
size -= precision;
|
||||
if (!(type & (ZEROPAD | LEFT)))
|
||||
while (size-- > 0)
|
||||
*str++ = ' ';
|
||||
if (sign)
|
||||
*str++ = sign;
|
||||
|
||||
if (type & HEX_PREP)
|
||||
{
|
||||
if (base == 8)
|
||||
*str++ = '0';
|
||||
else if (base == 16)
|
||||
{
|
||||
*str++ = '0';
|
||||
*str++ = digits[33];
|
||||
}
|
||||
}
|
||||
|
||||
if (!(type & LEFT))
|
||||
while (size-- > 0)
|
||||
*str++ = c;
|
||||
while (i < precision--)
|
||||
*str++ = '0';
|
||||
while (i-- > 0)
|
||||
*str++ = tmp[i];
|
||||
while (size-- > 0)
|
||||
*str++ = ' ';
|
||||
|
||||
return str;
|
||||
}
|
||||
|
||||
static char *
|
||||
eaddr(char *str, unsigned char *addr, int size, int precision, int type)
|
||||
{
|
||||
char tmp[24];
|
||||
char *dig = digits;
|
||||
int i, len;
|
||||
|
||||
if (type & UPPERCASE)
|
||||
dig = upper_digits;
|
||||
len = 0;
|
||||
for (i = 0; i < 6; i++)
|
||||
{
|
||||
if (i != 0)
|
||||
tmp[len++] = ':';
|
||||
tmp[len++] = dig[addr[i] >> 4];
|
||||
tmp[len++] = dig[addr[i] & 0x0F];
|
||||
}
|
||||
|
||||
if (!(type & LEFT))
|
||||
while (len < size--)
|
||||
*str++ = ' ';
|
||||
for (i = 0; i < len; ++i)
|
||||
*str++ = tmp[i];
|
||||
while (len < size--)
|
||||
*str++ = ' ';
|
||||
|
||||
return str;
|
||||
}
|
||||
|
||||
static char *
|
||||
iaddr(char *str, unsigned char *addr, int size, int precision, int type)
|
||||
{
|
||||
char tmp[24];
|
||||
int i, n, len;
|
||||
|
||||
len = 0;
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
if (i != 0)
|
||||
tmp[len++] = '.';
|
||||
n = addr[i];
|
||||
|
||||
if (n == 0)
|
||||
tmp[len++] = digits[0];
|
||||
else
|
||||
{
|
||||
if (n >= 100)
|
||||
{
|
||||
tmp[len++] = digits[n / 100];
|
||||
n = n % 100;
|
||||
tmp[len++] = digits[n / 10];
|
||||
n = n % 10;
|
||||
}
|
||||
else if (n >= 10)
|
||||
{
|
||||
tmp[len++] = digits[n / 10];
|
||||
n = n % 10;
|
||||
}
|
||||
|
||||
tmp[len++] = digits[n];
|
||||
}
|
||||
}
|
||||
|
||||
if (!(type & LEFT))
|
||||
while (len < size--)
|
||||
*str++ = ' ';
|
||||
for (i = 0; i < len; ++i)
|
||||
*str++ = tmp[i];
|
||||
while (len < size--)
|
||||
*str++ = ' ';
|
||||
|
||||
return str;
|
||||
}
|
||||
|
||||
#if HAS_FLOAT
|
||||
|
||||
char * ecvtbuf(double arg, int ndigits, int *decpt, int *sign, char *buf);
|
||||
char * fcvtbuf(double arg, int ndigits, int *decpt, int *sign, char *buf);
|
||||
static void ee_bufcpy(char *d, char *s, int count);
|
||||
|
||||
void
|
||||
ee_bufcpy(char *pd, char *ps, int count)
|
||||
{
|
||||
char *pe = ps + count;
|
||||
while (ps != pe)
|
||||
*pd++ = *ps++;
|
||||
}
|
||||
|
||||
static void
|
||||
parse_float(double value, char *buffer, char fmt, int precision)
|
||||
{
|
||||
int decpt, sign, exp, pos;
|
||||
char *digits = NULL;
|
||||
char cvtbuf[80];
|
||||
int capexp = 0;
|
||||
int magnitude;
|
||||
|
||||
if (fmt == 'G' || fmt == 'E')
|
||||
{
|
||||
capexp = 1;
|
||||
fmt += 'a' - 'A';
|
||||
}
|
||||
|
||||
if (fmt == 'g')
|
||||
{
|
||||
digits = ecvtbuf(value, precision, &decpt, &sign, cvtbuf);
|
||||
magnitude = decpt - 1;
|
||||
if (magnitude < -4 || magnitude > precision - 1)
|
||||
{
|
||||
fmt = 'e';
|
||||
precision -= 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
fmt = 'f';
|
||||
precision -= decpt;
|
||||
}
|
||||
}
|
||||
|
||||
if (fmt == 'e')
|
||||
{
|
||||
digits = ecvtbuf(value, precision + 1, &decpt, &sign, cvtbuf);
|
||||
|
||||
if (sign)
|
||||
*buffer++ = '-';
|
||||
*buffer++ = *digits;
|
||||
if (precision > 0)
|
||||
*buffer++ = '.';
|
||||
ee_bufcpy(buffer, digits + 1, precision);
|
||||
buffer += precision;
|
||||
*buffer++ = capexp ? 'E' : 'e';
|
||||
|
||||
if (decpt == 0)
|
||||
{
|
||||
if (value == 0.0)
|
||||
exp = 0;
|
||||
else
|
||||
exp = -1;
|
||||
}
|
||||
else
|
||||
exp = decpt - 1;
|
||||
|
||||
if (exp < 0)
|
||||
{
|
||||
*buffer++ = '-';
|
||||
exp = -exp;
|
||||
}
|
||||
else
|
||||
*buffer++ = '+';
|
||||
|
||||
buffer[2] = (exp % 10) + '0';
|
||||
exp = exp / 10;
|
||||
buffer[1] = (exp % 10) + '0';
|
||||
exp = exp / 10;
|
||||
buffer[0] = (exp % 10) + '0';
|
||||
buffer += 3;
|
||||
}
|
||||
else if (fmt == 'f')
|
||||
{
|
||||
digits = fcvtbuf(value, precision, &decpt, &sign, cvtbuf);
|
||||
if (sign)
|
||||
*buffer++ = '-';
|
||||
if (*digits)
|
||||
{
|
||||
if (decpt <= 0)
|
||||
{
|
||||
*buffer++ = '0';
|
||||
*buffer++ = '.';
|
||||
for (pos = 0; pos < -decpt; pos++)
|
||||
*buffer++ = '0';
|
||||
while (*digits)
|
||||
*buffer++ = *digits++;
|
||||
}
|
||||
else
|
||||
{
|
||||
pos = 0;
|
||||
while (*digits)
|
||||
{
|
||||
if (pos++ == decpt)
|
||||
*buffer++ = '.';
|
||||
*buffer++ = *digits++;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
*buffer++ = '0';
|
||||
if (precision > 0)
|
||||
{
|
||||
*buffer++ = '.';
|
||||
for (pos = 0; pos < precision; pos++)
|
||||
*buffer++ = '0';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
*buffer = '\0';
|
||||
}
|
||||
|
||||
static void
|
||||
decimal_point(char *buffer)
|
||||
{
|
||||
while (*buffer)
|
||||
{
|
||||
if (*buffer == '.')
|
||||
return;
|
||||
if (*buffer == 'e' || *buffer == 'E')
|
||||
break;
|
||||
buffer++;
|
||||
}
|
||||
|
||||
if (*buffer)
|
||||
{
|
||||
int n = strnlen(buffer, 256);
|
||||
while (n > 0)
|
||||
{
|
||||
buffer[n + 1] = buffer[n];
|
||||
n--;
|
||||
}
|
||||
|
||||
*buffer = '.';
|
||||
}
|
||||
else
|
||||
{
|
||||
*buffer++ = '.';
|
||||
*buffer = '\0';
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
cropzeros(char *buffer)
|
||||
{
|
||||
char *stop;
|
||||
|
||||
while (*buffer && *buffer != '.')
|
||||
buffer++;
|
||||
if (*buffer++)
|
||||
{
|
||||
while (*buffer && *buffer != 'e' && *buffer != 'E')
|
||||
buffer++;
|
||||
stop = buffer--;
|
||||
while (*buffer == '0')
|
||||
buffer--;
|
||||
if (*buffer == '.')
|
||||
buffer--;
|
||||
while (buffer != stop)
|
||||
*++buffer = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static char *
|
||||
flt(char *str, double num, int size, int precision, char fmt, int flags)
|
||||
{
|
||||
char tmp[80];
|
||||
char c, sign;
|
||||
int n, i;
|
||||
|
||||
// Left align means no zero padding
|
||||
if (flags & LEFT)
|
||||
flags &= ~ZEROPAD;
|
||||
|
||||
// Determine padding and sign char
|
||||
c = (flags & ZEROPAD) ? '0' : ' ';
|
||||
sign = 0;
|
||||
if (flags & SIGN)
|
||||
{
|
||||
if (num < 0.0)
|
||||
{
|
||||
sign = '-';
|
||||
num = -num;
|
||||
size--;
|
||||
}
|
||||
else if (flags & PLUS)
|
||||
{
|
||||
sign = '+';
|
||||
size--;
|
||||
}
|
||||
else if (flags & SPACE)
|
||||
{
|
||||
sign = ' ';
|
||||
size--;
|
||||
}
|
||||
}
|
||||
|
||||
// Compute the precision value
|
||||
if (precision < 0)
|
||||
precision = 6; // Default precision: 6
|
||||
|
||||
// Convert floating point number to text
|
||||
parse_float(num, tmp, fmt, precision);
|
||||
|
||||
if ((flags & HEX_PREP) && precision == 0)
|
||||
decimal_point(tmp);
|
||||
if (fmt == 'g' && !(flags & HEX_PREP))
|
||||
cropzeros(tmp);
|
||||
|
||||
n = strnlen(tmp, 256);
|
||||
|
||||
// Output number with alignment and padding
|
||||
size -= n;
|
||||
if (!(flags & (ZEROPAD | LEFT)))
|
||||
while (size-- > 0)
|
||||
*str++ = ' ';
|
||||
if (sign)
|
||||
*str++ = sign;
|
||||
if (!(flags & LEFT))
|
||||
while (size-- > 0)
|
||||
*str++ = c;
|
||||
for (i = 0; i < n; i++)
|
||||
*str++ = tmp[i];
|
||||
while (size-- > 0)
|
||||
*str++ = ' ';
|
||||
|
||||
return str;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static int
|
||||
ee_vsprintf(char *buf, const char *fmt, va_list args)
|
||||
{
|
||||
int len;
|
||||
unsigned long num;
|
||||
int i, base;
|
||||
char * str;
|
||||
char * s;
|
||||
|
||||
int flags; // Flags to number()
|
||||
|
||||
int field_width; // Width of output field
|
||||
int precision; // Min. # of digits for integers; max number of chars for
|
||||
// from string
|
||||
int qualifier; // 'h', 'l', or 'L' for integer fields
|
||||
|
||||
for (str = buf; *fmt; fmt++)
|
||||
{
|
||||
if (*fmt != '%')
|
||||
{
|
||||
*str++ = *fmt;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Process flags
|
||||
flags = 0;
|
||||
repeat:
|
||||
fmt++; // This also skips first '%'
|
||||
switch (*fmt)
|
||||
{
|
||||
case '-':
|
||||
flags |= LEFT;
|
||||
goto repeat;
|
||||
case '+':
|
||||
flags |= PLUS;
|
||||
goto repeat;
|
||||
case ' ':
|
||||
flags |= SPACE;
|
||||
goto repeat;
|
||||
case '#':
|
||||
flags |= HEX_PREP;
|
||||
goto repeat;
|
||||
case '0':
|
||||
flags |= ZEROPAD;
|
||||
goto repeat;
|
||||
}
|
||||
|
||||
// Get field width
|
||||
field_width = -1;
|
||||
if (is_digit(*fmt))
|
||||
field_width = skip_atoi(&fmt);
|
||||
else if (*fmt == '*')
|
||||
{
|
||||
fmt++;
|
||||
field_width = va_arg(args, int);
|
||||
if (field_width < 0)
|
||||
{
|
||||
field_width = -field_width;
|
||||
flags |= LEFT;
|
||||
}
|
||||
}
|
||||
|
||||
// Get the precision
|
||||
precision = -1;
|
||||
if (*fmt == '.')
|
||||
{
|
||||
++fmt;
|
||||
if (is_digit(*fmt))
|
||||
precision = skip_atoi(&fmt);
|
||||
else if (*fmt == '*')
|
||||
{
|
||||
++fmt;
|
||||
precision = va_arg(args, int);
|
||||
}
|
||||
if (precision < 0)
|
||||
precision = 0;
|
||||
}
|
||||
|
||||
// Get the conversion qualifier
|
||||
qualifier = -1;
|
||||
if (*fmt == 'l' || *fmt == 'L')
|
||||
{
|
||||
qualifier = *fmt;
|
||||
fmt++;
|
||||
}
|
||||
|
||||
// Default base
|
||||
base = 10;
|
||||
|
||||
switch (*fmt)
|
||||
{
|
||||
case 'c':
|
||||
if (!(flags & LEFT))
|
||||
while (--field_width > 0)
|
||||
*str++ = ' ';
|
||||
*str++ = (unsigned char)va_arg(args, int);
|
||||
while (--field_width > 0)
|
||||
*str++ = ' ';
|
||||
continue;
|
||||
|
||||
case 's':
|
||||
s = va_arg(args, char *);
|
||||
if (!s)
|
||||
s = "<NULL>";
|
||||
len = strnlen(s, precision);
|
||||
if (!(flags & LEFT))
|
||||
while (len < field_width--)
|
||||
*str++ = ' ';
|
||||
for (i = 0; i < len; ++i)
|
||||
*str++ = *s++;
|
||||
while (len < field_width--)
|
||||
*str++ = ' ';
|
||||
continue;
|
||||
|
||||
case 'p':
|
||||
if (field_width == -1)
|
||||
{
|
||||
field_width = 2 * sizeof(void *);
|
||||
flags |= ZEROPAD;
|
||||
}
|
||||
str = number(str,
|
||||
(unsigned long)va_arg(args, void *),
|
||||
16,
|
||||
field_width,
|
||||
precision,
|
||||
flags);
|
||||
continue;
|
||||
|
||||
case 'A':
|
||||
flags |= UPPERCASE;
|
||||
|
||||
case 'a':
|
||||
if (qualifier == 'l')
|
||||
str = eaddr(str,
|
||||
va_arg(args, unsigned char *),
|
||||
field_width,
|
||||
precision,
|
||||
flags);
|
||||
else
|
||||
str = iaddr(str,
|
||||
va_arg(args, unsigned char *),
|
||||
field_width,
|
||||
precision,
|
||||
flags);
|
||||
continue;
|
||||
|
||||
// Integer number formats - set up the flags and "break"
|
||||
case 'o':
|
||||
base = 8;
|
||||
break;
|
||||
|
||||
case 'X':
|
||||
flags |= UPPERCASE;
|
||||
|
||||
case 'x':
|
||||
base = 16;
|
||||
break;
|
||||
|
||||
case 'd':
|
||||
case 'i':
|
||||
flags |= SIGN;
|
||||
|
||||
case 'u':
|
||||
break;
|
||||
|
||||
#if HAS_FLOAT
|
||||
|
||||
case 'f':
|
||||
str = flt(str,
|
||||
va_arg(args, double),
|
||||
field_width,
|
||||
precision,
|
||||
*fmt,
|
||||
flags | SIGN);
|
||||
continue;
|
||||
|
||||
#endif
|
||||
|
||||
default:
|
||||
if (*fmt != '%')
|
||||
*str++ = '%';
|
||||
if (*fmt)
|
||||
*str++ = *fmt;
|
||||
else
|
||||
--fmt;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (qualifier == 'l')
|
||||
num = va_arg(args, unsigned long);
|
||||
else if (flags & SIGN)
|
||||
num = va_arg(args, int);
|
||||
else
|
||||
num = va_arg(args, unsigned int);
|
||||
|
||||
str = number(str, num, base, field_width, precision, flags);
|
||||
}
|
||||
|
||||
*str = '\0';
|
||||
return str - buf;
|
||||
}
|
||||
|
||||
void
|
||||
uart_send_char(char c)
|
||||
{
|
||||
putchar(c);
|
||||
|
||||
/*
|
||||
if(c=='\n')
|
||||
{
|
||||
while(io.uart.stat&1); // uart busy, wait...
|
||||
io.uart.fifo = '\r';
|
||||
}
|
||||
|
||||
while(io.uart.stat&1); // uart busy, wait...
|
||||
io.uart.fifo = c;
|
||||
*/
|
||||
// #error "You must implement the method uart_send_char to use this file!\n";
|
||||
/* Output of a char to a UART usually follows the following model:
|
||||
Wait until UART is ready
|
||||
Write char to UART
|
||||
Wait until UART is done
|
||||
|
||||
Or in code:
|
||||
while (*UART_CONTROL_ADDRESS != UART_READY);
|
||||
*UART_DATA_ADDRESS = c;
|
||||
while (*UART_CONTROL_ADDRESS != UART_READY);
|
||||
|
||||
Check the UART sample code on your platform or the board
|
||||
documentation.
|
||||
*/
|
||||
}
|
||||
|
||||
int
|
||||
ee_printf(const char *fmt, ...)
|
||||
{
|
||||
char buf[1024], *p;
|
||||
va_list args;
|
||||
int n = 0;
|
||||
|
||||
va_start(args, fmt);
|
||||
ee_vsprintf(buf, fmt, args);
|
||||
va_end(args);
|
||||
p = buf;
|
||||
while (*p)
|
||||
{
|
||||
uart_send_char(*p);
|
||||
n++;
|
||||
p++;
|
||||
}
|
||||
|
||||
return n;
|
||||
}
|
425
FIRMWARE/DHRYSTONE/dhry.h
Normal file
425
FIRMWARE/DHRYSTONE/dhry.h
Normal file
@@ -0,0 +1,425 @@
|
||||
/*
|
||||
****************************************************************************
|
||||
*
|
||||
* "DHRYSTONE" Benchmark Program
|
||||
* -----------------------------
|
||||
*
|
||||
* Version: C, Version 2.1
|
||||
*
|
||||
* File: dhry.h (part 1 of 3)
|
||||
*
|
||||
* Date: May 25, 1988
|
||||
*
|
||||
* Author: Reinhold P. Weicker
|
||||
* Siemens AG, AUT E 51
|
||||
* Postfach 3220
|
||||
* 8520 Erlangen
|
||||
* Germany (West)
|
||||
* Phone: [+49]-9131-7-20330
|
||||
* (8-17 Central European Time)
|
||||
* Usenet: ..!mcsun!unido!estevax!weicker
|
||||
*
|
||||
* Original Version (in Ada) published in
|
||||
* "Communications of the ACM" vol. 27., no. 10 (Oct. 1984),
|
||||
* pp. 1013 - 1030, together with the statistics
|
||||
* on which the distribution of statements etc. is based.
|
||||
*
|
||||
* In this C version, the following C library functions are used:
|
||||
* - strcpy, strcmp (inside the measurement loop)
|
||||
* - printf, scanf (outside the measurement loop)
|
||||
* In addition, Berkeley UNIX system calls "times ()" or "time ()"
|
||||
* are used for execution time measurement. For measurements
|
||||
* on other systems, these calls have to be changed.
|
||||
*
|
||||
* Collection of Results:
|
||||
* Reinhold Weicker (address see above) and
|
||||
*
|
||||
* Rick Richardson
|
||||
* PC Research. Inc.
|
||||
* 94 Apple Orchard Drive
|
||||
* Tinton Falls, NJ 07724
|
||||
* Phone: (201) 389-8963 (9-17 EST)
|
||||
* Usenet: ...!uunet!pcrat!rick
|
||||
*
|
||||
* Please send results to Rick Richardson and/or Reinhold Weicker.
|
||||
* Complete information should be given on hardware and software used.
|
||||
* Hardware information includes: Machine type, CPU, type and size
|
||||
* of caches; for microprocessors: clock frequency, memory speed
|
||||
* (number of wait states).
|
||||
* Software information includes: Compiler (and runtime library)
|
||||
* manufacturer and version, compilation switches, OS version.
|
||||
* The Operating System version may give an indication about the
|
||||
* compiler; Dhrystone itself performs no OS calls in the measurement loop.
|
||||
*
|
||||
* The complete output generated by the program should be mailed
|
||||
* such that at least some checks for correctness can be made.
|
||||
*
|
||||
***************************************************************************
|
||||
*
|
||||
* History: This version C/2.1 has been made for two reasons:
|
||||
*
|
||||
* 1) There is an obvious need for a common C version of
|
||||
* Dhrystone, since C is at present the most popular system
|
||||
* programming language for the class of processors
|
||||
* (microcomputers, minicomputers) where Dhrystone is used most.
|
||||
* There should be, as far as possible, only one C version of
|
||||
* Dhrystone such that results can be compared without
|
||||
* restrictions. In the past, the C versions distributed
|
||||
* by Rick Richardson (Version 1.1) and by Reinhold Weicker
|
||||
* had small (though not significant) differences.
|
||||
*
|
||||
* 2) As far as it is possible without changes to the Dhrystone
|
||||
* statistics, optimizing compilers should be prevented from
|
||||
* removing significant statements.
|
||||
*
|
||||
* This C version has been developed in cooperation with
|
||||
* Rick Richardson (Tinton Falls, NJ), it incorporates many
|
||||
* ideas from the "Version 1.1" distributed previously by
|
||||
* him over the UNIX network Usenet.
|
||||
* I also thank Chaim Benedelac (National Semiconductor),
|
||||
* David Ditzel (SUN), Earl Killian and John Mashey (MIPS),
|
||||
* Alan Smith and Rafael Saavedra-Barrera (UC at Berkeley)
|
||||
* for their help with comments on earlier versions of the
|
||||
* benchmark.
|
||||
*
|
||||
* Changes: In the initialization part, this version follows mostly
|
||||
* Rick Richardson's version distributed via Usenet, not the
|
||||
* version distributed earlier via floppy disk by Reinhold Weicker.
|
||||
* As a concession to older compilers, names have been made
|
||||
* unique within the first 8 characters.
|
||||
* Inside the measurement loop, this version follows the
|
||||
* version previously distributed by Reinhold Weicker.
|
||||
*
|
||||
* At several places in the benchmark, code has been added,
|
||||
* but within the measurement loop only in branches that
|
||||
* are not executed. The intention is that optimizing compilers
|
||||
* should be prevented from moving code out of the measurement
|
||||
* loop, or from removing code altogether. Since the statements
|
||||
* that are executed within the measurement loop have NOT been
|
||||
* changed, the numbers defining the "Dhrystone distribution"
|
||||
* (distribution of statements, operand types and locality)
|
||||
* still hold. Except for sophisticated optimizing compilers,
|
||||
* execution times for this version should be the same as
|
||||
* for previous versions.
|
||||
*
|
||||
* Since it has proven difficult to subtract the time for the
|
||||
* measurement loop overhead in a correct way, the loop check
|
||||
* has been made a part of the benchmark. This does have
|
||||
* an impact - though a very minor one - on the distribution
|
||||
* statistics which have been updated for this version.
|
||||
*
|
||||
* All changes within the measurement loop are described
|
||||
* and discussed in the companion paper "Rationale for
|
||||
* Dhrystone version 2".
|
||||
*
|
||||
* Because of the self-imposed limitation that the order and
|
||||
* distribution of the executed statements should not be
|
||||
* changed, there are still cases where optimizing compilers
|
||||
* may not generate code for some statements. To a certain
|
||||
* degree, this is unavoidable for small synthetic benchmarks.
|
||||
* Users of the benchmark are advised to check code listings
|
||||
* whether code is generated for all statements of Dhrystone.
|
||||
*
|
||||
* Version 2.1 is identical to version 2.0 distributed via
|
||||
* the UNIX network Usenet in March 1988 except that it corrects
|
||||
* some minor deficiencies that were found by users of version 2.0.
|
||||
* The only change within the measurement loop is that a
|
||||
* non-executed "else" part was added to the "if" statement in
|
||||
* Func_3, and a non-executed "else" part removed from Proc_3.
|
||||
*
|
||||
***************************************************************************
|
||||
*
|
||||
* Defines: The following "Defines" are possible:
|
||||
* -DREG=register (default: Not defined)
|
||||
* As an approximation to what an average C programmer
|
||||
* might do, the "register" storage class is applied
|
||||
* (if enabled by -DREG=register)
|
||||
* - for local variables, if they are used (dynamically)
|
||||
* five or more times
|
||||
* - for parameters if they are used (dynamically)
|
||||
* six or more times
|
||||
* Note that an optimal "register" strategy is
|
||||
* compiler-dependent, and that "register" declarations
|
||||
* do not necessarily lead to faster execution.
|
||||
* -DNOSTRUCTASSIGN (default: Not defined)
|
||||
* Define if the C compiler does not support
|
||||
* assignment of structures.
|
||||
* -DNOENUMS (default: Not defined)
|
||||
* Define if the C compiler does not support
|
||||
* enumeration types.
|
||||
* -DTIMES (default)
|
||||
* -DTIME
|
||||
* The "times" function of UNIX (returning process times)
|
||||
* or the "time" function (returning wallclock time)
|
||||
* is used for measurement.
|
||||
* For single user machines, "time ()" is adequate. For
|
||||
* multi-user machines where you cannot get single-user
|
||||
* access, use the "times ()" function. If you have
|
||||
* neither, use a stopwatch in the dead of night.
|
||||
* "printf"s are provided marking the points "Start Timer"
|
||||
* and "Stop Timer". DO NOT use the UNIX "time(1)"
|
||||
* command, as this will measure the total time to
|
||||
* run this program, which will (erroneously) include
|
||||
* the time to allocate storage (malloc) and to perform
|
||||
* the initialization.
|
||||
* -DHZ=nnn
|
||||
* In Berkeley UNIX, the function "times" returns process
|
||||
* time in 1/HZ seconds, with HZ = 60 for most systems.
|
||||
* CHECK YOUR SYSTEM DESCRIPTION BEFORE YOU JUST APPLY
|
||||
* A VALUE.
|
||||
*
|
||||
***************************************************************************
|
||||
*
|
||||
* Compilation model and measurement (IMPORTANT):
|
||||
*
|
||||
* This C version of Dhrystone consists of three files:
|
||||
* - dhry.h (this file, containing global definitions and comments)
|
||||
* - dhry_1.c (containing the code corresponding to Ada package Pack_1)
|
||||
* - dhry_2.c (containing the code corresponding to Ada package Pack_2)
|
||||
*
|
||||
* The following "ground rules" apply for measurements:
|
||||
* - Separate compilation
|
||||
* - No procedure merging
|
||||
* - Otherwise, compiler optimizations are allowed but should be indicated
|
||||
* - Default results are those without register declarations
|
||||
* See the companion paper "Rationale for Dhrystone Version 2" for a more
|
||||
* detailed discussion of these ground rules.
|
||||
*
|
||||
* For 16-Bit processors (e.g. 80186, 80286), times for all compilation
|
||||
* models ("small", "medium", "large" etc.) should be given if possible,
|
||||
* together with a definition of these models for the compiler system used.
|
||||
*
|
||||
**************************************************************************
|
||||
*
|
||||
* Dhrystone (C version) statistics:
|
||||
*
|
||||
* [Comment from the first distribution, updated for version 2.
|
||||
* Note that because of language differences, the numbers are slightly
|
||||
* different from the Ada version.]
|
||||
*
|
||||
* The following program contains statements of a high level programming
|
||||
* language (here: C) in a distribution considered representative:
|
||||
*
|
||||
* assignments 52 (51.0 %)
|
||||
* control statements 33 (32.4 %)
|
||||
* procedure, function calls 17 (16.7 %)
|
||||
*
|
||||
* 103 statements are dynamically executed. The program is balanced with
|
||||
* respect to the three aspects:
|
||||
*
|
||||
* - statement type
|
||||
* - operand type
|
||||
* - operand locality
|
||||
* operand global, local, parameter, or constant.
|
||||
*
|
||||
* The combination of these three aspects is balanced only approximately.
|
||||
*
|
||||
* 1. Statement Type:
|
||||
* ----------------- number
|
||||
*
|
||||
* V1 = V2 9
|
||||
* (incl. V1 = F(..)
|
||||
* V = Constant 12
|
||||
* Assignment, 7
|
||||
* with array element
|
||||
* Assignment, 6
|
||||
* with record component
|
||||
* --
|
||||
* 34 34
|
||||
*
|
||||
* X = Y +|-|"&&"|"|" Z 5
|
||||
* X = Y +|-|"==" Constant 6
|
||||
* X = X +|- 1 3
|
||||
* X = Y *|/ Z 2
|
||||
* X = Expression, 1
|
||||
* two operators
|
||||
* X = Expression, 1
|
||||
* three operators
|
||||
* --
|
||||
* 18 18
|
||||
*
|
||||
* if .... 14
|
||||
* with "else" 7
|
||||
* without "else" 7
|
||||
* executed 3
|
||||
* not executed 4
|
||||
* for ... 7 | counted every time
|
||||
* while ... 4 | the loop condition
|
||||
* do ... while 1 | is evaluated
|
||||
* switch ... 1
|
||||
* break 1
|
||||
* declaration with 1
|
||||
* initialization
|
||||
* --
|
||||
* 34 34
|
||||
*
|
||||
* P (...) procedure call 11
|
||||
* user procedure 10
|
||||
* library procedure 1
|
||||
* X = F (...)
|
||||
* function call 6
|
||||
* user function 5
|
||||
* library function 1
|
||||
* --
|
||||
* 17 17
|
||||
* ---
|
||||
* 103
|
||||
*
|
||||
* The average number of parameters in procedure or function calls
|
||||
* is 1.82 (not counting the function values as implicit parameters).
|
||||
*
|
||||
*
|
||||
* 2. Operators
|
||||
* ------------
|
||||
* number approximate
|
||||
* percentage
|
||||
*
|
||||
* Arithmetic 32 50.8
|
||||
*
|
||||
* + 21 33.3
|
||||
* - 7 11.1
|
||||
* * 3 4.8
|
||||
* / (int div) 1 1.6
|
||||
*
|
||||
* Comparison 27 42.8
|
||||
*
|
||||
* == 9 14.3
|
||||
* /= 4 6.3
|
||||
* > 1 1.6
|
||||
* < 3 4.8
|
||||
* >= 1 1.6
|
||||
* <= 9 14.3
|
||||
*
|
||||
* Logic 4 6.3
|
||||
*
|
||||
* && (AND-THEN) 1 1.6
|
||||
* | (OR) 1 1.6
|
||||
* ! (NOT) 2 3.2
|
||||
*
|
||||
* -- -----
|
||||
* 63 100.1
|
||||
*
|
||||
*
|
||||
* 3. Operand Type (counted once per operand reference):
|
||||
* ---------------
|
||||
* number approximate
|
||||
* percentage
|
||||
*
|
||||
* Integer 175 72.3 %
|
||||
* Character 45 18.6 %
|
||||
* Pointer 12 5.0 %
|
||||
* String30 6 2.5 %
|
||||
* Array 2 0.8 %
|
||||
* Record 2 0.8 %
|
||||
* --- -------
|
||||
* 242 100.0 %
|
||||
*
|
||||
* When there is an access path leading to the final operand (e.g. a record
|
||||
* component), only the final data type on the access path is counted.
|
||||
*
|
||||
*
|
||||
* 4. Operand Locality:
|
||||
* -------------------
|
||||
* number approximate
|
||||
* percentage
|
||||
*
|
||||
* local variable 114 47.1 %
|
||||
* global variable 22 9.1 %
|
||||
* parameter 45 18.6 %
|
||||
* value 23 9.5 %
|
||||
* reference 22 9.1 %
|
||||
* function result 6 2.5 %
|
||||
* constant 55 22.7 %
|
||||
* --- -------
|
||||
* 242 100.0 %
|
||||
*
|
||||
*
|
||||
* The program does not compute anything meaningful, but it is syntactically
|
||||
* and semantically correct. All variables have a value assigned to them
|
||||
* before they are used as a source operand.
|
||||
*
|
||||
* There has been no explicit effort to account for the effects of a
|
||||
* cache, or to balance the use of long or short displacements for code or
|
||||
* data.
|
||||
*
|
||||
***************************************************************************
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
/* Compiler and system dependent definitions: */
|
||||
|
||||
#ifndef TIME
|
||||
#define TIMES
|
||||
#endif
|
||||
/* Use times(2) time function unless */
|
||||
/* explicitly defined otherwise */
|
||||
|
||||
#ifdef TIMES
|
||||
#include <sys/types.h>
|
||||
#include <sys/times.h>
|
||||
/* for "times" */
|
||||
#endif
|
||||
|
||||
#define Mic_secs_Per_Second 80000000.0
|
||||
/* Berkeley UNIX C returns process times in seconds/HZ */
|
||||
|
||||
#ifdef NOSTRUCTASSIGN
|
||||
#define structassign(d, s) memcpy(&(d), &(s), sizeof(d))
|
||||
#else
|
||||
#define structassign(d, s) d = s
|
||||
#endif
|
||||
|
||||
#ifdef NOENUM
|
||||
#define Ident_1 0
|
||||
#define Ident_2 1
|
||||
#define Ident_3 2
|
||||
#define Ident_4 3
|
||||
#define Ident_5 4
|
||||
typedef int Enumeration;
|
||||
#else
|
||||
typedef enum {Ident_1, Ident_2, Ident_3, Ident_4, Ident_5}
|
||||
Enumeration;
|
||||
#endif
|
||||
/* for boolean and enumeration types in Ada, Pascal */
|
||||
|
||||
/* General definitions: */
|
||||
|
||||
//#include <stdio.h>
|
||||
/* for strcpy, strcmp */
|
||||
|
||||
#define Null 0
|
||||
/* Value of a Null pointer */
|
||||
#define true 1
|
||||
#define false 0
|
||||
|
||||
typedef int One_Thirty;
|
||||
typedef int One_Fifty;
|
||||
typedef char Capital_Letter;
|
||||
typedef int Boolean;
|
||||
typedef char Str_30 [31];
|
||||
typedef int Arr_1_Dim [50];
|
||||
typedef int Arr_2_Dim [50] [50];
|
||||
|
||||
typedef struct record
|
||||
{
|
||||
struct record *Ptr_Comp;
|
||||
Enumeration Discr;
|
||||
union {
|
||||
struct {
|
||||
Enumeration Enum_Comp;
|
||||
int Int_Comp;
|
||||
char Str_Comp [31];
|
||||
} var_1;
|
||||
struct {
|
||||
Enumeration E_Comp_2;
|
||||
char Str_2_Comp [31];
|
||||
} var_2;
|
||||
struct {
|
||||
char Ch_1_Comp;
|
||||
char Ch_2_Comp;
|
||||
} var_3;
|
||||
} variant;
|
||||
} Rec_Type, *Rec_Pointer;
|
||||
|
||||
|
384
FIRMWARE/DHRYSTONE/dhry_1.c
Normal file
384
FIRMWARE/DHRYSTONE/dhry_1.c
Normal file
@@ -0,0 +1,384 @@
|
||||
/*
|
||||
****************************************************************************
|
||||
*
|
||||
* "DHRYSTONE" Benchmark Program
|
||||
* -----------------------------
|
||||
*
|
||||
* Version: C, Version 2.1
|
||||
*
|
||||
* File: dhry_1.c (part 2 of 3)
|
||||
*
|
||||
* Date: May 25, 1988
|
||||
*
|
||||
* Author: Reinhold P. Weicker
|
||||
*
|
||||
****************************************************************************
|
||||
*/
|
||||
|
||||
#include "dhry.h"
|
||||
#include <stdint.h>
|
||||
|
||||
/* Global Variables: */
|
||||
|
||||
Rec_Pointer Ptr_Glob,
|
||||
Next_Ptr_Glob;
|
||||
int Int_Glob;
|
||||
Boolean Bool_Glob;
|
||||
char Ch_1_Glob,
|
||||
Ch_2_Glob;
|
||||
int Arr_1_Glob [50];
|
||||
int Arr_2_Glob [50] [50];
|
||||
|
||||
Enumeration Func_1 ();
|
||||
/* forward declaration necessary since Enumeration may not simply be int */
|
||||
|
||||
#ifndef REG
|
||||
Boolean Reg = false;
|
||||
#define REG
|
||||
/* REG becomes defined as empty */
|
||||
/* i.e. no register variables */
|
||||
#else
|
||||
Boolean Reg = true;
|
||||
#endif
|
||||
|
||||
/* variables for time measurement: */
|
||||
extern uint64_t rdcycle();
|
||||
extern uint64_t rdinstret();
|
||||
uint64_t Begin_Time,
|
||||
End_Time,
|
||||
User_Time;
|
||||
uint64_t Begin_Insn,
|
||||
End_Insn,
|
||||
User_Insn;
|
||||
/* end of variables for time measurement */
|
||||
|
||||
|
||||
main ()
|
||||
/*****/
|
||||
|
||||
/* main program, corresponds to procedures */
|
||||
/* Main and Proc_0 in the Ada version */
|
||||
{
|
||||
One_Fifty Int_1_Loc;
|
||||
REG One_Fifty Int_2_Loc;
|
||||
One_Fifty Int_3_Loc;
|
||||
REG char Ch_Index;
|
||||
Enumeration Enum_Loc;
|
||||
Str_30 Str_1_Loc;
|
||||
Str_30 Str_2_Loc;
|
||||
REG int Run_Index;
|
||||
REG int Number_Of_Runs;
|
||||
|
||||
Rec_Type R1,R2;
|
||||
|
||||
/* Initializations */
|
||||
|
||||
|
||||
/*
|
||||
* FEMTOSOC/FEMTORV32 modifications ===========================
|
||||
*/
|
||||
|
||||
/*
|
||||
* Since there are only two calls to malloc(), and that malloc()
|
||||
* is not supported yet by femtosoc lib, I replaced them with
|
||||
* pre-allocated structures.
|
||||
*/
|
||||
Next_Ptr_Glob = &R1; // (Rec_Pointer) malloc (sizeof (Rec_Type));
|
||||
Ptr_Glob = &R2; // (Rec_Pointer) malloc (sizeof (Rec_Type));
|
||||
|
||||
/*
|
||||
* End of FEMTOSOC/FEMTORV32 modifications ======================
|
||||
*/
|
||||
Ptr_Glob->Ptr_Comp = Next_Ptr_Glob;
|
||||
Ptr_Glob->Discr = Ident_1;
|
||||
Ptr_Glob->variant.var_1.Enum_Comp = Ident_3;
|
||||
Ptr_Glob->variant.var_1.Int_Comp = 40;
|
||||
strcpy (Ptr_Glob->variant.var_1.Str_Comp,
|
||||
"DHRYSTONE PROGRAM, SOME STRING");
|
||||
strcpy (Str_1_Loc, "DHRYSTONE PROGRAM, 1'ST STRING");
|
||||
|
||||
Arr_2_Glob [8][7] = 10;
|
||||
/* Was missing in published program. Without this statement, */
|
||||
/* Arr_2_Glob [8][7] would have an undefined value. */
|
||||
/* Warning: With 16-Bit processors and Number_Of_Runs > 32000, */
|
||||
/* overflow may occur for this array element. */
|
||||
|
||||
printf ("\n");
|
||||
printf ("Dhrystone Benchmark, Version 2.1 (Language: C)\n");
|
||||
printf ("\n");
|
||||
if (Reg)
|
||||
{
|
||||
printf ("Program compiled with 'register' attribute\n");
|
||||
printf ("\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
printf ("Program compiled without 'register' attribute\n");
|
||||
printf ("\n");
|
||||
}
|
||||
printf ("Please give the number of runs through the benchmark: ");
|
||||
{
|
||||
// int n;
|
||||
// scanf ("%d", &n);
|
||||
Number_Of_Runs = 50000;
|
||||
}
|
||||
printf ("\n");
|
||||
|
||||
printf ("Execution starts, %d runs through Dhrystone\n", Number_Of_Runs);
|
||||
|
||||
/***************/
|
||||
/* Start timer */
|
||||
/***************/
|
||||
|
||||
Begin_Time = rdcycle();
|
||||
Begin_Insn = rdinstret();
|
||||
|
||||
printf(">>> Begin_time=%d\n", (int)Begin_Time);
|
||||
printf(">>> Begin_insn=%d\n", (int)Begin_Insn);
|
||||
|
||||
for (Run_Index = 1; Run_Index <= Number_Of_Runs; ++Run_Index)
|
||||
{
|
||||
Proc_5();
|
||||
Proc_4();
|
||||
/* Ch_1_Glob == 'A', Ch_2_Glob == 'B', Bool_Glob == true */
|
||||
Int_1_Loc = 2;
|
||||
Int_2_Loc = 3;
|
||||
strcpy (Str_2_Loc, "DHRYSTONE PROGRAM, 2'ND STRING");
|
||||
Enum_Loc = Ident_2;
|
||||
Bool_Glob = ! Func_2 (Str_1_Loc, Str_2_Loc);
|
||||
/* Bool_Glob == 1 */
|
||||
while (Int_1_Loc < Int_2_Loc) /* loop body executed once */
|
||||
{
|
||||
Int_3_Loc = 5 * Int_1_Loc - Int_2_Loc;
|
||||
/* Int_3_Loc == 7 */
|
||||
Proc_7 (Int_1_Loc, Int_2_Loc, &Int_3_Loc);
|
||||
/* Int_3_Loc == 7 */
|
||||
Int_1_Loc += 1;
|
||||
} /* while */
|
||||
/* Int_1_Loc == 3, Int_2_Loc == 3, Int_3_Loc == 7 */
|
||||
Proc_8 (Arr_1_Glob, Arr_2_Glob, Int_1_Loc, Int_3_Loc);
|
||||
/* Int_Glob == 5 */
|
||||
Proc_1 (Ptr_Glob);
|
||||
for (Ch_Index = 'A'; Ch_Index <= Ch_2_Glob; ++Ch_Index)
|
||||
/* loop body executed twice */
|
||||
{
|
||||
if (Enum_Loc == Func_1 (Ch_Index, 'C'))
|
||||
/* then, not executed */
|
||||
{
|
||||
Proc_6 (Ident_1, &Enum_Loc);
|
||||
strcpy (Str_2_Loc, "DHRYSTONE PROGRAM, 3'RD STRING");
|
||||
Int_2_Loc = Run_Index;
|
||||
Int_Glob = Run_Index;
|
||||
}
|
||||
}
|
||||
/* Int_1_Loc == 3, Int_2_Loc == 3, Int_3_Loc == 7 */
|
||||
Int_2_Loc = Int_2_Loc * Int_1_Loc;
|
||||
Int_1_Loc = Int_2_Loc / Int_3_Loc;
|
||||
Int_2_Loc = 7 * (Int_2_Loc - Int_3_Loc) - Int_1_Loc;
|
||||
/* Int_1_Loc == 1, Int_2_Loc == 13, Int_3_Loc == 7 */
|
||||
Proc_2 (&Int_1_Loc);
|
||||
/* Int_1_Loc == 5 */
|
||||
|
||||
} /* loop "for Run_Index" */
|
||||
|
||||
/**************/
|
||||
/* Stop timer */
|
||||
/**************/
|
||||
|
||||
End_Time = rdcycle();
|
||||
End_Insn = rdinstret();
|
||||
|
||||
printf ("Execution ends\n");
|
||||
printf ("\n");
|
||||
printf ("Final values of the variables used in the benchmark:\n");
|
||||
printf ("\n");
|
||||
printf ("Int_Glob: %d\n", Int_Glob);
|
||||
printf (" should be: %d\n", 5);
|
||||
printf ("Bool_Glob: %d\n", Bool_Glob);
|
||||
printf (" should be: %d\n", 1);
|
||||
printf ("Ch_1_Glob: %c\n", Ch_1_Glob);
|
||||
printf (" should be: %c\n", 'A');
|
||||
printf ("Ch_2_Glob: %c\n", Ch_2_Glob);
|
||||
printf (" should be: %c\n", 'B');
|
||||
printf ("Arr_1_Glob[8]: %d\n", Arr_1_Glob[8]);
|
||||
printf (" should be: %d\n", 7);
|
||||
printf ("Arr_2_Glob[8][7]: %d\n", Arr_2_Glob[8][7]);
|
||||
printf (" should be: Number_Of_Runs + 10\n");
|
||||
printf ("Ptr_Glob->\n");
|
||||
printf (" Ptr_Comp: %d\n", (int) Ptr_Glob->Ptr_Comp);
|
||||
printf (" should be: (implementation-dependent)\n");
|
||||
printf (" Discr: %d\n", Ptr_Glob->Discr);
|
||||
printf (" should be: %d\n", 0);
|
||||
printf (" Enum_Comp: %d\n", Ptr_Glob->variant.var_1.Enum_Comp);
|
||||
printf (" should be: %d\n", 2);
|
||||
printf (" Int_Comp: %d\n", Ptr_Glob->variant.var_1.Int_Comp);
|
||||
printf (" should be: %d\n", 17);
|
||||
printf (" Str_Comp: %s\n", Ptr_Glob->variant.var_1.Str_Comp);
|
||||
printf (" should be: DHRYSTONE PROGRAM, SOME STRING\n");
|
||||
printf ("Next_Ptr_Glob->\n");
|
||||
printf (" Ptr_Comp: %d\n", (int) Next_Ptr_Glob->Ptr_Comp);
|
||||
printf (" should be: (implementation-dependent), same as above\n");
|
||||
printf (" Discr: %d\n", Next_Ptr_Glob->Discr);
|
||||
printf (" should be: %d\n", 0);
|
||||
printf (" Enum_Comp: %d\n", Next_Ptr_Glob->variant.var_1.Enum_Comp);
|
||||
printf (" should be: %d\n", 1);
|
||||
printf (" Int_Comp: %d\n", Next_Ptr_Glob->variant.var_1.Int_Comp);
|
||||
printf (" should be: %d\n", 18);
|
||||
printf (" Str_Comp: %s\n",
|
||||
Next_Ptr_Glob->variant.var_1.Str_Comp);
|
||||
printf (" should be: DHRYSTONE PROGRAM, SOME STRING\n");
|
||||
printf ("Int_1_Loc: %d\n", Int_1_Loc);
|
||||
printf (" should be: %d\n", 5);
|
||||
printf ("Int_2_Loc: %d\n", Int_2_Loc);
|
||||
printf (" should be: %d\n", 13);
|
||||
printf ("Int_3_Loc: %d\n", Int_3_Loc);
|
||||
printf (" should be: %d\n", 7);
|
||||
printf ("Enum_Loc: %d\n", Enum_Loc);
|
||||
printf (" should be: %d\n", 1);
|
||||
printf ("Str_1_Loc: %s\n", Str_1_Loc);
|
||||
printf (" should be: DHRYSTONE PROGRAM, 1'ST STRING\n");
|
||||
printf ("Str_2_Loc: %s\n", Str_2_Loc);
|
||||
printf (" should be: DHRYSTONE PROGRAM, 2'ND STRING\n");
|
||||
printf ("\n");
|
||||
|
||||
User_Time = End_Time - Begin_Time;
|
||||
User_Insn = End_Insn - Begin_Insn;
|
||||
|
||||
printf("Number_Of_Runs: %d\n", Number_Of_Runs);
|
||||
printf("User_Time: %d cycles, %d insn\n", (int)User_Time, (int)User_Insn);
|
||||
|
||||
uint64_t Cycles_Per_Instruction_x1000 = (1000 * User_Time) / User_Insn;
|
||||
printf("Cycles_Per_Instruction: %d.%d%d%d\n",
|
||||
(int)( Cycles_Per_Instruction_x1000 / 1000),
|
||||
(int)((Cycles_Per_Instruction_x1000 / 100 ) % 10),
|
||||
(int)((Cycles_Per_Instruction_x1000 / 10 ) % 10),
|
||||
(int)((Cycles_Per_Instruction_x1000 / 1 ) % 10)
|
||||
);
|
||||
|
||||
show_CPI_2();
|
||||
|
||||
uint64_t Dhrystones_Per_Second_Per_MHz = ((uint64_t)Number_Of_Runs * 1000000) / User_Time;
|
||||
printf("Dhrystones_Per_Second_Per_MHz: %d\n", (int)Dhrystones_Per_Second_Per_MHz);
|
||||
|
||||
/*
|
||||
* "Another common representation of the Dhrystone benchmark is the DMIPS (Dhrystone MIPS) obtained
|
||||
* when the Dhrystone score is divided by 1757 (the number of Dhrystones per second obtained on the
|
||||
* VAX 11/780, nominally a 1 MIPS machine)."
|
||||
*/
|
||||
|
||||
int DMIPS_Per_MHz_x1000 = ((uint64_t)1000 * Dhrystones_Per_Second_Per_MHz) / 1757;
|
||||
printf("DMIPS_Per_MHz: %d.%d%d%d\n",
|
||||
(int)(DMIPS_Per_MHz_x1000 / 1000),
|
||||
(int)((DMIPS_Per_MHz_x1000 / 100) % 10),
|
||||
(int)((DMIPS_Per_MHz_x1000 / 10) % 10),
|
||||
(int)((DMIPS_Per_MHz_x1000 / 1) % 10));
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
Proc_1 (Ptr_Val_Par)
|
||||
/******************/
|
||||
|
||||
REG Rec_Pointer Ptr_Val_Par;
|
||||
/* executed once */
|
||||
{
|
||||
REG Rec_Pointer Next_Record = Ptr_Val_Par->Ptr_Comp;
|
||||
/* == Ptr_Glob_Next */
|
||||
/* Local variable, initialized with Ptr_Val_Par->Ptr_Comp, */
|
||||
/* corresponds to "rename" in Ada, "with" in Pascal */
|
||||
|
||||
structassign (*Ptr_Val_Par->Ptr_Comp, *Ptr_Glob);
|
||||
Ptr_Val_Par->variant.var_1.Int_Comp = 5;
|
||||
Next_Record->variant.var_1.Int_Comp
|
||||
= Ptr_Val_Par->variant.var_1.Int_Comp;
|
||||
Next_Record->Ptr_Comp = Ptr_Val_Par->Ptr_Comp;
|
||||
Proc_3 (&Next_Record->Ptr_Comp);
|
||||
/* Ptr_Val_Par->Ptr_Comp->Ptr_Comp
|
||||
== Ptr_Glob->Ptr_Comp */
|
||||
if (Next_Record->Discr == Ident_1)
|
||||
/* then, executed */
|
||||
{
|
||||
Next_Record->variant.var_1.Int_Comp = 6;
|
||||
Proc_6 (Ptr_Val_Par->variant.var_1.Enum_Comp,
|
||||
&Next_Record->variant.var_1.Enum_Comp);
|
||||
Next_Record->Ptr_Comp = Ptr_Glob->Ptr_Comp;
|
||||
Proc_7 (Next_Record->variant.var_1.Int_Comp, 10,
|
||||
&Next_Record->variant.var_1.Int_Comp);
|
||||
}
|
||||
else /* not executed */
|
||||
structassign (*Ptr_Val_Par, *Ptr_Val_Par->Ptr_Comp);
|
||||
} /* Proc_1 */
|
||||
|
||||
|
||||
Proc_2 (Int_Par_Ref)
|
||||
/******************/
|
||||
/* executed once */
|
||||
/* *Int_Par_Ref == 1, becomes 4 */
|
||||
|
||||
One_Fifty *Int_Par_Ref;
|
||||
{
|
||||
One_Fifty Int_Loc;
|
||||
Enumeration Enum_Loc;
|
||||
|
||||
Int_Loc = *Int_Par_Ref + 10;
|
||||
do /* executed once */
|
||||
if (Ch_1_Glob == 'A')
|
||||
/* then, executed */
|
||||
{
|
||||
Int_Loc -= 1;
|
||||
*Int_Par_Ref = Int_Loc - Int_Glob;
|
||||
Enum_Loc = Ident_1;
|
||||
} /* if */
|
||||
while (Enum_Loc != Ident_1); /* true */
|
||||
} /* Proc_2 */
|
||||
|
||||
|
||||
Proc_3 (Ptr_Ref_Par)
|
||||
/******************/
|
||||
/* executed once */
|
||||
/* Ptr_Ref_Par becomes Ptr_Glob */
|
||||
|
||||
Rec_Pointer *Ptr_Ref_Par;
|
||||
|
||||
{
|
||||
if (Ptr_Glob != Null)
|
||||
/* then, executed */
|
||||
*Ptr_Ref_Par = Ptr_Glob->Ptr_Comp;
|
||||
Proc_7 (10, Int_Glob, &Ptr_Glob->variant.var_1.Int_Comp);
|
||||
} /* Proc_3 */
|
||||
|
||||
|
||||
Proc_4 () /* without parameters */
|
||||
/*******/
|
||||
/* executed once */
|
||||
{
|
||||
Boolean Bool_Loc;
|
||||
|
||||
Bool_Loc = Ch_1_Glob == 'A';
|
||||
Bool_Glob = Bool_Loc | Bool_Glob;
|
||||
Ch_2_Glob = 'B';
|
||||
} /* Proc_4 */
|
||||
|
||||
|
||||
Proc_5 () /* without parameters */
|
||||
/*******/
|
||||
/* executed once */
|
||||
{
|
||||
Ch_1_Glob = 'A';
|
||||
Bool_Glob = false;
|
||||
} /* Proc_5 */
|
||||
|
||||
|
||||
/* Procedure for the assignment of structures, */
|
||||
/* if the C compiler doesn't support this feature */
|
||||
#ifdef NOSTRUCTASSIGN
|
||||
memcpy (d, s, l)
|
||||
register char *d;
|
||||
register char *s;
|
||||
register int l;
|
||||
{
|
||||
while (l--) *d++ = *s++;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
192
FIRMWARE/DHRYSTONE/dhry_2.c
Normal file
192
FIRMWARE/DHRYSTONE/dhry_2.c
Normal file
@@ -0,0 +1,192 @@
|
||||
/*
|
||||
****************************************************************************
|
||||
*
|
||||
* "DHRYSTONE" Benchmark Program
|
||||
* -----------------------------
|
||||
*
|
||||
* Version: C, Version 2.1
|
||||
*
|
||||
* File: dhry_2.c (part 3 of 3)
|
||||
*
|
||||
* Date: May 25, 1988
|
||||
*
|
||||
* Author: Reinhold P. Weicker
|
||||
*
|
||||
****************************************************************************
|
||||
*/
|
||||
|
||||
#include "dhry.h"
|
||||
|
||||
#ifndef REG
|
||||
#define REG
|
||||
/* REG becomes defined as empty */
|
||||
/* i.e. no register variables */
|
||||
#endif
|
||||
|
||||
extern int Int_Glob;
|
||||
extern char Ch_1_Glob;
|
||||
|
||||
|
||||
Proc_6 (Enum_Val_Par, Enum_Ref_Par)
|
||||
/*********************************/
|
||||
/* executed once */
|
||||
/* Enum_Val_Par == Ident_3, Enum_Ref_Par becomes Ident_2 */
|
||||
|
||||
Enumeration Enum_Val_Par;
|
||||
Enumeration *Enum_Ref_Par;
|
||||
{
|
||||
*Enum_Ref_Par = Enum_Val_Par;
|
||||
if (! Func_3 (Enum_Val_Par))
|
||||
/* then, not executed */
|
||||
*Enum_Ref_Par = Ident_4;
|
||||
switch (Enum_Val_Par)
|
||||
{
|
||||
case Ident_1:
|
||||
*Enum_Ref_Par = Ident_1;
|
||||
break;
|
||||
case Ident_2:
|
||||
if (Int_Glob > 100)
|
||||
/* then */
|
||||
*Enum_Ref_Par = Ident_1;
|
||||
else *Enum_Ref_Par = Ident_4;
|
||||
break;
|
||||
case Ident_3: /* executed */
|
||||
*Enum_Ref_Par = Ident_2;
|
||||
break;
|
||||
case Ident_4: break;
|
||||
case Ident_5:
|
||||
*Enum_Ref_Par = Ident_3;
|
||||
break;
|
||||
} /* switch */
|
||||
} /* Proc_6 */
|
||||
|
||||
|
||||
Proc_7 (Int_1_Par_Val, Int_2_Par_Val, Int_Par_Ref)
|
||||
/**********************************************/
|
||||
/* executed three times */
|
||||
/* first call: Int_1_Par_Val == 2, Int_2_Par_Val == 3, */
|
||||
/* Int_Par_Ref becomes 7 */
|
||||
/* second call: Int_1_Par_Val == 10, Int_2_Par_Val == 5, */
|
||||
/* Int_Par_Ref becomes 17 */
|
||||
/* third call: Int_1_Par_Val == 6, Int_2_Par_Val == 10, */
|
||||
/* Int_Par_Ref becomes 18 */
|
||||
One_Fifty Int_1_Par_Val;
|
||||
One_Fifty Int_2_Par_Val;
|
||||
One_Fifty *Int_Par_Ref;
|
||||
{
|
||||
One_Fifty Int_Loc;
|
||||
|
||||
Int_Loc = Int_1_Par_Val + 2;
|
||||
*Int_Par_Ref = Int_2_Par_Val + Int_Loc;
|
||||
} /* Proc_7 */
|
||||
|
||||
|
||||
Proc_8 (Arr_1_Par_Ref, Arr_2_Par_Ref, Int_1_Par_Val, Int_2_Par_Val)
|
||||
/*********************************************************************/
|
||||
/* executed once */
|
||||
/* Int_Par_Val_1 == 3 */
|
||||
/* Int_Par_Val_2 == 7 */
|
||||
Arr_1_Dim Arr_1_Par_Ref;
|
||||
Arr_2_Dim Arr_2_Par_Ref;
|
||||
int Int_1_Par_Val;
|
||||
int Int_2_Par_Val;
|
||||
{
|
||||
REG One_Fifty Int_Index;
|
||||
REG One_Fifty Int_Loc;
|
||||
|
||||
Int_Loc = Int_1_Par_Val + 5;
|
||||
Arr_1_Par_Ref [Int_Loc] = Int_2_Par_Val;
|
||||
Arr_1_Par_Ref [Int_Loc+1] = Arr_1_Par_Ref [Int_Loc];
|
||||
Arr_1_Par_Ref [Int_Loc+30] = Int_Loc;
|
||||
for (Int_Index = Int_Loc; Int_Index <= Int_Loc+1; ++Int_Index)
|
||||
Arr_2_Par_Ref [Int_Loc] [Int_Index] = Int_Loc;
|
||||
Arr_2_Par_Ref [Int_Loc] [Int_Loc-1] += 1;
|
||||
Arr_2_Par_Ref [Int_Loc+20] [Int_Loc] = Arr_1_Par_Ref [Int_Loc];
|
||||
Int_Glob = 5;
|
||||
} /* Proc_8 */
|
||||
|
||||
|
||||
Enumeration Func_1 (Ch_1_Par_Val, Ch_2_Par_Val)
|
||||
/*************************************************/
|
||||
/* executed three times */
|
||||
/* first call: Ch_1_Par_Val == 'H', Ch_2_Par_Val == 'R' */
|
||||
/* second call: Ch_1_Par_Val == 'A', Ch_2_Par_Val == 'C' */
|
||||
/* third call: Ch_1_Par_Val == 'B', Ch_2_Par_Val == 'C' */
|
||||
|
||||
Capital_Letter Ch_1_Par_Val;
|
||||
Capital_Letter Ch_2_Par_Val;
|
||||
{
|
||||
Capital_Letter Ch_1_Loc;
|
||||
Capital_Letter Ch_2_Loc;
|
||||
|
||||
Ch_1_Loc = Ch_1_Par_Val;
|
||||
Ch_2_Loc = Ch_1_Loc;
|
||||
if (Ch_2_Loc != Ch_2_Par_Val)
|
||||
/* then, executed */
|
||||
return (Ident_1);
|
||||
else /* not executed */
|
||||
{
|
||||
Ch_1_Glob = Ch_1_Loc;
|
||||
return (Ident_2);
|
||||
}
|
||||
} /* Func_1 */
|
||||
|
||||
|
||||
Boolean Func_2 (Str_1_Par_Ref, Str_2_Par_Ref)
|
||||
/*************************************************/
|
||||
/* executed once */
|
||||
/* Str_1_Par_Ref == "DHRYSTONE PROGRAM, 1'ST STRING" */
|
||||
/* Str_2_Par_Ref == "DHRYSTONE PROGRAM, 2'ND STRING" */
|
||||
|
||||
Str_30 Str_1_Par_Ref;
|
||||
Str_30 Str_2_Par_Ref;
|
||||
{
|
||||
REG One_Thirty Int_Loc;
|
||||
Capital_Letter Ch_Loc;
|
||||
|
||||
Int_Loc = 2;
|
||||
while (Int_Loc <= 2) /* loop body executed once */
|
||||
if (Func_1 (Str_1_Par_Ref[Int_Loc],
|
||||
Str_2_Par_Ref[Int_Loc+1]) == Ident_1)
|
||||
/* then, executed */
|
||||
{
|
||||
Ch_Loc = 'A';
|
||||
Int_Loc += 1;
|
||||
} /* if, while */
|
||||
if (Ch_Loc >= 'W' && Ch_Loc < 'Z')
|
||||
/* then, not executed */
|
||||
Int_Loc = 7;
|
||||
if (Ch_Loc == 'R')
|
||||
/* then, not executed */
|
||||
return (true);
|
||||
else /* executed */
|
||||
{
|
||||
if (strcmp (Str_1_Par_Ref, Str_2_Par_Ref) > 0)
|
||||
/* then, not executed */
|
||||
{
|
||||
Int_Loc += 7;
|
||||
Int_Glob = Int_Loc;
|
||||
return (true);
|
||||
}
|
||||
else /* executed */
|
||||
return (false);
|
||||
} /* if Ch_Loc */
|
||||
} /* Func_2 */
|
||||
|
||||
|
||||
Boolean Func_3 (Enum_Par_Val)
|
||||
/***************************/
|
||||
/* executed once */
|
||||
/* Enum_Par_Val == Ident_3 */
|
||||
Enumeration Enum_Par_Val;
|
||||
{
|
||||
Enumeration Enum_Loc;
|
||||
|
||||
Enum_Loc = Enum_Par_Val;
|
||||
if (Enum_Loc == Ident_3)
|
||||
/* then, executed */
|
||||
return (true);
|
||||
else /* not executed */
|
||||
return (false);
|
||||
} /* Func_3 */
|
||||
|
56
FIRMWARE/DHRYSTONE/stubs.c
Normal file
56
FIRMWARE/DHRYSTONE/stubs.c
Normal file
@@ -0,0 +1,56 @@
|
||||
#include <stdint.h>
|
||||
#include <perf.h>
|
||||
|
||||
uint64_t time() {
|
||||
return rdcycle();
|
||||
}
|
||||
|
||||
uint64_t insn() {
|
||||
return rdinstret();
|
||||
}
|
||||
|
||||
char *strcpy(char *dest, const char *src) {
|
||||
char* result = dest;
|
||||
while(*dest++=*src++);
|
||||
return result;
|
||||
}
|
||||
|
||||
int strcmp (const char *p1, const char *p2) {
|
||||
const unsigned char *s1 = (const unsigned char *) p1;
|
||||
const unsigned char *s2 = (const unsigned char *) p2;
|
||||
unsigned char c1, c2;
|
||||
do {
|
||||
c1 = (unsigned char) *s1++;
|
||||
c2 = (unsigned char) *s2++;
|
||||
if (c1 == '\0') {
|
||||
return c1 - c2;
|
||||
}
|
||||
}
|
||||
while (c1 == c2);
|
||||
return c1 - c2;
|
||||
}
|
||||
|
||||
/*************************************************************/
|
||||
|
||||
// Print "fixed point" number (integer/1000)
|
||||
void printk(uint64_t kx) {
|
||||
int intpart = (int)(kx / 1000);
|
||||
int fracpart = (int)(kx % 1000);
|
||||
printf("%d.",intpart);
|
||||
if(fracpart<100) {
|
||||
printf("0");
|
||||
}
|
||||
if(fracpart<10) {
|
||||
printf("0");
|
||||
}
|
||||
printf("%d",fracpart);
|
||||
}
|
||||
|
||||
void show_CPI_2() {
|
||||
uint64_t instret = rdinstret();
|
||||
uint64_t cycles = rdcycle();
|
||||
uint64_t kCPI = cycles*1000/instret;
|
||||
printf(">>> CPI ="); printk(kCPI); printf("\n");
|
||||
printf(">>> instret = %d\n", (int)(instret));
|
||||
printf(">>> cycles = %d\n", (int)(cycles));
|
||||
}
|
460
FIRMWARE/GL_tty.h
Normal file
460
FIRMWARE/GL_tty.h
Normal file
@@ -0,0 +1,460 @@
|
||||
/**
|
||||
* ansi_graphics.h
|
||||
* A couple of function to display graphics in the terminal,
|
||||
* using ansi sequences.
|
||||
* Bruno Levy, Jan 2024
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#ifndef GL_FPS
|
||||
#define GL_FPS 30
|
||||
#endif
|
||||
|
||||
#if defined(__linux__) || defined(_WIN32) || defined(__APPLE__)
|
||||
#define BIGCPU // we are compiling for a real machine
|
||||
#else
|
||||
#define TINYCPU // we are compiling for a softwore
|
||||
#endif
|
||||
|
||||
#ifdef __linux__
|
||||
#include <unistd.h> // for usleep()
|
||||
#endif
|
||||
|
||||
// You can define GL_width and GL_height before
|
||||
// #including ansi_graphics.h in case the plain
|
||||
// old 80x25 pixels does not suffice.
|
||||
|
||||
#ifndef GL_width
|
||||
#define GL_width 80
|
||||
#endif
|
||||
|
||||
#ifndef GL_height
|
||||
#define GL_height 25
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \brief Sets the current graphics position
|
||||
* \param[in] x typically in 0,79
|
||||
* \param[in] y typically in 0,24
|
||||
*/
|
||||
static inline void GL_gotoxy(int x, int y) {
|
||||
printf("\033[%d;%dH",y,x);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Sets the current graphics position
|
||||
* \param[in] R , G , B the RGB color of the pixel, in [0..255]
|
||||
* \details Typically used by programs that draw all pixels sequentially,
|
||||
* like a raytracer. After each line, one can either printf("\n") or
|
||||
* call GL_gotoxy(). If you want to draw individual pixels in an
|
||||
* arbitrary order, use GL_setpixelRGB(x,y,R,G,B)
|
||||
*/
|
||||
static inline void GL_setpixelRGBhere(uint8_t R, uint8_t G, uint8_t B) {
|
||||
// set background color, print space
|
||||
printf("\033[48;2;%d;%d;%dm ",(int)R,(int)G,(int)B);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* \brief Draws two "pixels" at the current
|
||||
* cursor position and advances the current cursor
|
||||
* position.
|
||||
* \details Characters are roughly twice as high as wide.
|
||||
* To generate square pixels, this function draws two pixels in
|
||||
* the same character, using the special lower-half white / upper-half
|
||||
* black character, and setting the background and foreground colors.
|
||||
*/
|
||||
static inline void GL_set2pixelsRGBhere(
|
||||
uint8_t r1, uint8_t g1, uint8_t b1,
|
||||
uint8_t r2, uint8_t g2, uint8_t b2
|
||||
) {
|
||||
if((r2 == r1) && (g2 == g1) && (b2 == b1)) {
|
||||
GL_setpixelRGBhere(r1,g1,b1);
|
||||
} else {
|
||||
printf("\033[48;2;%d;%d;%dm",(int)r1,(int)g1,(int)b1);
|
||||
printf("\033[38;2;%d;%d;%dm",(int)r2,(int)g2,(int)b2);
|
||||
// https://www.w3.org/TR/xml-entity-names/025.html
|
||||
// https://onlineunicodetools.com/convert-unicode-to-utf8
|
||||
// https://copypastecharacter.com/
|
||||
printf("\xE2\x96\x83");
|
||||
}
|
||||
}
|
||||
|
||||
#define GL_RGB(R,G,B) #R ";" #G ";" #B
|
||||
|
||||
static inline void GL_setpixelIhere(
|
||||
const char** cmap, int c
|
||||
) {
|
||||
// set background color, print space
|
||||
printf("\033[48;2;%sm ",cmap[c]);
|
||||
}
|
||||
|
||||
static inline void GL_set2pixelsIhere(
|
||||
const char** cmap, int c1, int c2
|
||||
) {
|
||||
if(c1 == c2) {
|
||||
GL_setpixelIhere(cmap, c1);
|
||||
} else {
|
||||
printf("\033[48;2;%sm",cmap[c1]);
|
||||
printf("\033[38;2;%sm",cmap[c2]);
|
||||
// https://www.w3.org/TR/xml-entity-names/025.html
|
||||
// https://onlineunicodetools.com/convert-unicode-to-utf8
|
||||
// https://copypastecharacter.com/
|
||||
printf("\xE2\x96\x83");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Moves the cursor position to the next line.
|
||||
* \details Background and foreground colors are set to black.
|
||||
*/
|
||||
static inline void GL_newline() {
|
||||
printf("\033[38;2;0;0;0m");
|
||||
printf("\033[48;2;0;0;0m\n");
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Sets the color of a pixel
|
||||
* \param[in] x typically in 0,79
|
||||
* \param[in] y typically in 0,24
|
||||
* \param[in] R , G , B the RGB color of the pixel, in [0..255]
|
||||
*/
|
||||
static inline void GL_setpixelRGB(
|
||||
int x, int y, uint8_t R, uint8_t G, uint8_t B
|
||||
) {
|
||||
GL_gotoxy(x,y);
|
||||
GL_setpixelRGBhere(R,G,B);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief restore default foreground and background colors
|
||||
*/
|
||||
static inline void GL_restore_default_colors() {
|
||||
printf(
|
||||
"\033[48;5;16m" // set background color black
|
||||
"\033[38;5;15m" // set foreground color white
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Call this function each time graphics should be cleared
|
||||
*/
|
||||
static inline void GL_clear() {
|
||||
GL_restore_default_colors();
|
||||
printf("\033[2J"); // clear screen
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Moves current drawing position to top-left corner
|
||||
* \see GL_setpixelRGBhere() and GL_set2pixelsRGBhere()
|
||||
*/
|
||||
static inline void GL_home() {
|
||||
printf("\033[H");
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Call this function before starting drawing graphics
|
||||
* or each time graphics should be cleared
|
||||
*/
|
||||
static inline void GL_init() {
|
||||
printf("\033[?25l"); // hide cursor
|
||||
GL_home();
|
||||
GL_clear();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* \brief Call this function at the end of the program
|
||||
*/
|
||||
static inline void GL_terminate() {
|
||||
GL_restore_default_colors();
|
||||
GL_gotoxy(0,GL_height);
|
||||
printf("\033[?25h"); // show cursor
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Flushes pending graphic operations and waits a bit
|
||||
*/
|
||||
static inline void GL_swapbuffers() {
|
||||
// only flush if we are on a big machine, with true stdio support
|
||||
// otherwise does nothing (because our small MCU io lib is not buffered)
|
||||
#ifdef BIGCPU
|
||||
fflush(stdout);
|
||||
#endif
|
||||
#ifdef __linux__
|
||||
usleep(1000000/GL_FPS);
|
||||
#endif
|
||||
}
|
||||
|
||||
typedef void (*GL_pixelfunc_RGB)(int x, int y, uint8_t* r, uint8_t* g, uint8_t* b);
|
||||
typedef void (*GL_pixelfunc_RGBf)(int x, int y, float* r, float* g, float* b);
|
||||
|
||||
/**
|
||||
* \brief Draws an image by calling a user-specified function for each pixel.
|
||||
* \param[in] width , height dimension of the image in square pixels
|
||||
* \param[in] do_pixel the user function to be called for each pixel
|
||||
* (a "shader"), that determines the (integer) components r,g,b of
|
||||
* the pixel's color.
|
||||
* \details Uses half-charater pixels.
|
||||
*/
|
||||
static inline void GL_scan_RGB(
|
||||
int width, int height, GL_pixelfunc_RGB do_pixel
|
||||
) {
|
||||
uint8_t r1, g1, b1;
|
||||
uint8_t r2, g2, b2;
|
||||
GL_home();
|
||||
for (int j = 0; j<height; j+=2) {
|
||||
for (int i = 0; i<width; i++) {
|
||||
do_pixel(i,j , &r1, &g1, &b1);
|
||||
do_pixel(i,j+1, &r2, &g2, &b2);
|
||||
GL_set2pixelsRGBhere(r1,g1,b1,r2,g2,b2);
|
||||
if(i == width-1) {
|
||||
GL_newline();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* brief Converts a floating point value to a byte.
|
||||
* \param[in] the floating point value in [0,1]
|
||||
* \return the byte, in [0,255]
|
||||
* \details the input value is clamped to [0,1]
|
||||
*/
|
||||
static inline uint8_t GL_ftoi(float f) {
|
||||
f = (f < 0.0f) ? 0.0f : f;
|
||||
f = (f > 1.0f) ? 1.0f : f;
|
||||
return (uint8_t)(255.0f * f);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Draws an image by calling a user-specified function for each pixel.
|
||||
* \param[in] width , height dimension of the image in square pixels
|
||||
* \param[in] do_pixel the user function to be called for each pixel
|
||||
* (a "shader"), that determines the (floating-point) components
|
||||
* fr,fg,fb of the pixel's color.
|
||||
* \details Uses half-charater pixels.
|
||||
*/
|
||||
static inline void GL_scan_RGBf(
|
||||
int width, int height, GL_pixelfunc_RGBf do_pixel
|
||||
) {
|
||||
float fr1, fg1, fb1;
|
||||
float fr2, fg2, fb2;
|
||||
uint8_t r1, g1, b1;
|
||||
uint8_t r2, g2, b2;
|
||||
GL_home();
|
||||
for (int j = 0; j<height; j+=2) {
|
||||
for (int i = 0; i<width; i++) {
|
||||
do_pixel(i,j , &fr1, &fg1, &fb1);
|
||||
r1 = GL_ftoi(fr1);
|
||||
g1 = GL_ftoi(fg1);
|
||||
b1 = GL_ftoi(fb1);
|
||||
do_pixel(i,j+1, &fr2, &fg2, &fb2);
|
||||
r2 = GL_ftoi(fr2);
|
||||
g2 = GL_ftoi(fg2);
|
||||
b2 = GL_ftoi(fb2);
|
||||
GL_set2pixelsRGBhere(r1,g1,b1,r2,g2,b2);
|
||||
if(i == width-1) {
|
||||
GL_newline();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/***************************************************************/
|
||||
|
||||
#define INSIDE 0
|
||||
#define LEFT 1
|
||||
#define RIGHT 2
|
||||
#define BOTTOM 4
|
||||
#define TOP 8
|
||||
|
||||
#define XMIN 0
|
||||
#define XMAX (GL_width-1)
|
||||
#define YMIN 0
|
||||
#define YMAX (GL_height-1)
|
||||
|
||||
#define code(x,y) \
|
||||
((x) < XMIN) | (((x) > XMAX)<<1) | (((y) < YMIN)<<2) | (((y) > YMAX)<<3)
|
||||
|
||||
/***************************************************************/
|
||||
|
||||
static inline void GL_line(
|
||||
int x1, int y1, int x2, int y2, int R, int G, int B
|
||||
) {
|
||||
int x,y,dx,dy,sx,sy,tmp;
|
||||
|
||||
/* Cohen-Sutherland line clipping. */
|
||||
int code1 = code(x1,y1);
|
||||
int code2 = code(x2,y2);
|
||||
int codeout;
|
||||
|
||||
for(;;) {
|
||||
/* Both points inside. */
|
||||
if(code1 == 0 && code2 == 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
/* No point inside. */
|
||||
if(code1 & code2) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* One of the points is outside. */
|
||||
codeout = code1 ? code1 : code2;
|
||||
|
||||
/* Compute intersection. */
|
||||
if (codeout & TOP) {
|
||||
x = x1 + (x2 - x1) * (YMAX - y1) / (y2 - y1);
|
||||
y = YMAX;
|
||||
} else if (codeout & BOTTOM) {
|
||||
x = x1 + (x2 - x1) * (YMIN - y1) / (y2 - y1);
|
||||
y = YMIN;
|
||||
} else if (codeout & RIGHT) {
|
||||
y = y1 + (y2 - y1) * (XMAX - x1) / (x2 - x1);
|
||||
x = XMAX;
|
||||
} else if (codeout & LEFT) {
|
||||
y = y1 + (y2 - y1) * (XMIN - x1) / (x2 - x1);
|
||||
x = XMIN;
|
||||
}
|
||||
|
||||
/* Replace outside point with intersection. */
|
||||
if (codeout == code1) {
|
||||
x1 = x;
|
||||
y1 = y;
|
||||
code1 = code(x1,y1);
|
||||
} else {
|
||||
x2 = x;
|
||||
y2 = y;
|
||||
code2 = code(x2,y2);
|
||||
}
|
||||
}
|
||||
|
||||
// Swap both extremities to ensure x increases
|
||||
if(x2 < x1) {
|
||||
tmp = x2;
|
||||
x2 = x1;
|
||||
x1 = tmp;
|
||||
tmp = y2;
|
||||
y2 = y1;
|
||||
y1 = tmp;
|
||||
}
|
||||
|
||||
// Bresenham line drawing.
|
||||
dy = y2 - y1;
|
||||
sy = 1;
|
||||
if(dy < 0) {
|
||||
sy = -1;
|
||||
dy = -dy;
|
||||
}
|
||||
|
||||
dx = x2 - x1;
|
||||
|
||||
x = x1;
|
||||
y = y1;
|
||||
|
||||
if(dy > dx) {
|
||||
int ex = (dx << 1) - dy;
|
||||
for(int u=0; u<dy; u++) {
|
||||
GL_setpixelRGB(x,y,R,G,B);
|
||||
y += sy;
|
||||
if(ex >= 0) {
|
||||
x++;
|
||||
ex -= dy << 1;
|
||||
GL_setpixelRGB(x,y,R,G,B);
|
||||
}
|
||||
while(ex >= 0) {
|
||||
x++;
|
||||
ex -= dy << 1;
|
||||
putchar(' ');
|
||||
}
|
||||
ex += dx << 1;
|
||||
}
|
||||
} else {
|
||||
int ey = (dy << 1) - dx;
|
||||
for(int u=0; u<dx; u++) {
|
||||
GL_setpixelRGB(x,y,R,G,B);
|
||||
x++;
|
||||
while(ey >= 0) {
|
||||
y += sy;
|
||||
ey -= dx << 1;
|
||||
GL_setpixelRGB(x,y,R,G,B);
|
||||
}
|
||||
ey += dy << 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/***************************************************************/
|
||||
|
||||
#ifdef GL_USE_TURTLE
|
||||
|
||||
#include "sintab.h" // Ugly !!!
|
||||
|
||||
typedef struct {
|
||||
int x; // in [0..79]
|
||||
int y; // in [0..24]
|
||||
int angle; // in degrees
|
||||
int R,G,B; // pen color
|
||||
int pendown; // draw if non-zero
|
||||
} Turtle;
|
||||
|
||||
static inline void Turtle_init(Turtle* T) {
|
||||
T->x = GL_width/2;
|
||||
T->y = GL_height/2;
|
||||
T->angle = -90;
|
||||
T->pendown = 1;
|
||||
T->R = 255;
|
||||
T->G = 255;
|
||||
T->B = 255;
|
||||
}
|
||||
|
||||
static inline void Turtle_pen_up(Turtle* T) {
|
||||
T->pendown = 0;
|
||||
}
|
||||
|
||||
static inline void Turtle_pen_down(Turtle* T) {
|
||||
T->pendown = 1;
|
||||
}
|
||||
|
||||
static inline void Turtle_pen_color(Turtle* T, int R, int G, int B) {
|
||||
T->R = R;
|
||||
T->G = G;
|
||||
T->B = B;
|
||||
}
|
||||
|
||||
static inline void Turtle_forward(Turtle* T, int distance) {
|
||||
int last_x = T->x;
|
||||
int last_y = T->y;
|
||||
int a = T->angle;
|
||||
while(a < 0) {
|
||||
a += 360;
|
||||
}
|
||||
while(a > 360) {
|
||||
a -= 360;
|
||||
}
|
||||
T->x += (costab[a] * distance) / 256;
|
||||
T->y += (sintab[a] * distance) / 256;
|
||||
if(T->pendown) {
|
||||
GL_line(last_x, last_y, T->x, T->y, T->R, T->G, T->B);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void Turtle_backward(Turtle* T, int distance) {
|
||||
Turtle_forward(T,-distance);
|
||||
}
|
||||
|
||||
static inline void Turtle_turn_right(Turtle* T, int delta_angle) {
|
||||
T->angle += delta_angle;
|
||||
}
|
||||
|
||||
static inline void Turtle_turn_left(Turtle* T, int delta_angle) {
|
||||
Turtle_turn_right(T, -delta_angle);
|
||||
}
|
||||
|
||||
#endif
|
80
FIRMWARE/Makefile
Normal file
80
FIRMWARE/Makefile
Normal file
@@ -0,0 +1,80 @@
|
||||
include ../../../FIRMWARE/makefile.inc
|
||||
RVASFLAGS=-march=$(ARCH) -mabi=$(ABI)
|
||||
RVCFLAGS=-I. -O2 -fno-pic -march=$(ARCH) -mabi=$(ABI) -fno-stack-protector -w -Wl,--no-relax
|
||||
|
||||
RAM_SIZE=6144
|
||||
|
||||
LIBOBJECTS=putchar.o wait.o print.o memcpy.o errno.o perf.o
|
||||
|
||||
%.bram.elf: %.o start.o $(LIBOBJECTS) $(RV_BINARIES)
|
||||
$(RVLD) -T bram.ld -m elf32lriscv -nostdlib -norelax $< $(LIBOBJECTS) $(RVTOOLCHAIN_GCC_LIB_DIR)/libgcc.a -o $@
|
||||
|
||||
|
||||
%.hex: %.elf $(FIRMWARE_DIR)/TOOLS/firmware_words
|
||||
$(FIRMWARE_DIR)/TOOLS/firmware_words $< -ram $(RAM_SIZE) -max_addr $(RAM_SIZE) -out $@
|
||||
cp $@ ../firmware.hex
|
||||
mkdir -p ../obj_dir
|
||||
cp $@ ../obj_dir/firmware.hex
|
||||
echo $@ > ../firmware.txt
|
||||
|
||||
|
||||
# SPI FLASH 0 (sends everything to SPI flash)
|
||||
|
||||
%.spiflash0.elf: %.o start.o $(LIBOBJECTS) $(RV_BINARIES)
|
||||
$(RVLD) -T spiflash0.ld -m elf32lriscv -nostdlib -norelax $< $(LIBOBJECTS) $(RVTOOLCHAIN_GCC_LIB_DIR)/libgcc.a -o $@
|
||||
|
||||
%.spiflash0.bin: %.spiflash0.elf
|
||||
$(RVOBJCOPY) $< $@ -O binary
|
||||
|
||||
%.spiflash0.prog: %.spiflash0.bin
|
||||
iceprog -o 128k $<
|
||||
|
||||
# SPI FLASH 1 (sends code and variables initialization to SPI flash, variables to RAM)
|
||||
|
||||
%.spiflash1.elf: %.o start_spiflash1.o $(LIBOBJECTS) $(RV_BINARIES)
|
||||
$(RVLD) -T spiflash1.ld -m elf32lriscv -nostdlib -norelax $< $(LIBOBJECTS) $(RVTOOLCHAIN_GCC_LIB_DIR)/libgcc.a -o $@
|
||||
|
||||
|
||||
%.spiflash1.bin: %.spiflash1.elf
|
||||
$(RVOBJCOPY) $< $@ -O binary
|
||||
|
||||
%.spiflash1.prog: %.spiflash1.bin
|
||||
iceprog -o 128k $<
|
||||
|
||||
|
||||
# SPI FLASH 2 (sends code and variables initialization to SPI flash, variables and fastcode to RAM)
|
||||
|
||||
%.spiflash2.elf: %.o start_spiflash1.o $(LIBOBJECTS) $(RV_BINARIES)
|
||||
$(RVLD) -T spiflash2.ld -m elf32lriscv -nostdlib -norelax $< $(LIBOBJECTS) -L$(RVTOOLCHAIN_LIB_DIR) -lm $(RVTOOLCHAIN_GCC_LIB_DIR)/libgcc.a -o $@
|
||||
|
||||
|
||||
%.spiflash2.bin: %.spiflash2.elf
|
||||
$(RVOBJCOPY) $< $@ -O binary
|
||||
|
||||
%.spiflash2.prog: %.spiflash2.bin
|
||||
iceprog -o 128k $<
|
||||
|
||||
%.spiflash2.list: %.spiflash2.elf
|
||||
$(RVOBJDUMP) -Mnumeric -D $< > $@
|
||||
|
||||
# DUAL MEMORY (64 kb program ROM, 64 kb data RAM)
|
||||
|
||||
%.pipeline.elf: %.o start_pipeline.o $(LIBOBJECTS) $(RV_BINARIES)
|
||||
$(RVLD) -T pipeline.ld -m elf32lriscv -nostdlib -norelax $< $(LIBOBJECTS) -L$(RVTOOLCHAIN_LIB_DIR) -lm $(RVTOOLCHAIN_GCC_LIB_DIR)/libgcc.a -o $@
|
||||
$(RVOBJDUMP) -Mnumeric -D $@ > $@.list
|
||||
|
||||
%.PROGROM.hex: %.pipeline.elf $(FIRMWARE_DIR)/TOOLS/firmware_words
|
||||
$(FIRMWARE_DIR)/TOOLS/firmware_words $< -ram 0x20000 -max_addr 0x20000 -out $@ -from_addr 0 -to_addr 0xFFFF
|
||||
cp $@ ../PROGROM.hex
|
||||
mkdir -p ../obj_dir
|
||||
cp $@ ../obj_dir/PROGROM.hex
|
||||
|
||||
%.DATARAM.hex: %.pipeline.elf $(FIRMWARE_DIR)/TOOLS/firmware_words
|
||||
$(FIRMWARE_DIR)/TOOLS/firmware_words $< -ram 0x20000 -max_addr 0x20000 -out $@ -from_addr 0x10000 -to_addr 0x1FFFF
|
||||
cp $@ ../DATARAM.hex
|
||||
mkdir -p ../obj_dir
|
||||
cp $@ ../obj_dir/DATARAM.hex
|
||||
|
||||
%.pipeline.hex: %.PROGROM.hex %.DATARAM.hex
|
||||
echo $@ > ../firmware.txt
|
||||
|
4096
FIRMWARE/PRECOMPILED/RV32I/COREMARK/DATARAM.hex
Normal file
4096
FIRMWARE/PRECOMPILED/RV32I/COREMARK/DATARAM.hex
Normal file
File diff suppressed because it is too large
Load Diff
4096
FIRMWARE/PRECOMPILED/RV32I/COREMARK/PROGROM.hex
Normal file
4096
FIRMWARE/PRECOMPILED/RV32I/COREMARK/PROGROM.hex
Normal file
File diff suppressed because it is too large
Load Diff
4096
FIRMWARE/PRECOMPILED/RV32I/DHRYSTONES/DATARAM.hex
Normal file
4096
FIRMWARE/PRECOMPILED/RV32I/DHRYSTONES/DATARAM.hex
Normal file
File diff suppressed because it is too large
Load Diff
4096
FIRMWARE/PRECOMPILED/RV32I/DHRYSTONES/PROGROM.hex
Normal file
4096
FIRMWARE/PRECOMPILED/RV32I/DHRYSTONES/PROGROM.hex
Normal file
File diff suppressed because it is too large
Load Diff
4096
FIRMWARE/PRECOMPILED/RV32I/RAYSTONES/DATARAM.hex
Normal file
4096
FIRMWARE/PRECOMPILED/RV32I/RAYSTONES/DATARAM.hex
Normal file
File diff suppressed because it is too large
Load Diff
4096
FIRMWARE/PRECOMPILED/RV32I/RAYSTONES/PROGROM.hex
Normal file
4096
FIRMWARE/PRECOMPILED/RV32I/RAYSTONES/PROGROM.hex
Normal file
File diff suppressed because it is too large
Load Diff
4096
FIRMWARE/PRECOMPILED/RV32IM/COREMARK/DATARAM.hex
Normal file
4096
FIRMWARE/PRECOMPILED/RV32IM/COREMARK/DATARAM.hex
Normal file
File diff suppressed because it is too large
Load Diff
4096
FIRMWARE/PRECOMPILED/RV32IM/COREMARK/PROGROM.hex
Normal file
4096
FIRMWARE/PRECOMPILED/RV32IM/COREMARK/PROGROM.hex
Normal file
File diff suppressed because it is too large
Load Diff
4096
FIRMWARE/PRECOMPILED/RV32IM/DHRYSTONES/DATARAM.hex
Normal file
4096
FIRMWARE/PRECOMPILED/RV32IM/DHRYSTONES/DATARAM.hex
Normal file
File diff suppressed because it is too large
Load Diff
4096
FIRMWARE/PRECOMPILED/RV32IM/DHRYSTONES/PROGROM.hex
Normal file
4096
FIRMWARE/PRECOMPILED/RV32IM/DHRYSTONES/PROGROM.hex
Normal file
File diff suppressed because it is too large
Load Diff
4096
FIRMWARE/PRECOMPILED/RV32IM/RAYSTONES/DATARAM.hex
Normal file
4096
FIRMWARE/PRECOMPILED/RV32IM/RAYSTONES/DATARAM.hex
Normal file
File diff suppressed because it is too large
Load Diff
4096
FIRMWARE/PRECOMPILED/RV32IM/RAYSTONES/PROGROM.hex
Normal file
4096
FIRMWARE/PRECOMPILED/RV32IM/RAYSTONES/PROGROM.hex
Normal file
File diff suppressed because it is too large
Load Diff
480
FIRMWARE/ST_NICCC.c
Normal file
480
FIRMWARE/ST_NICCC.c
Normal file
@@ -0,0 +1,480 @@
|
||||
/*
|
||||
* Reading the ST-NICCC megademo data stored in
|
||||
* the SPI flash and streaming it to polygons,
|
||||
* rendered as ANSI character sequences through
|
||||
* the UART.
|
||||
*
|
||||
* The polygon stream is a 640K file, that needs
|
||||
* to be stored in the SPI flash, using:
|
||||
* ICEStick: iceprog -o 1M EXAMPLES/DATA/scene1.dat
|
||||
* ULX3S: cp EXAMPLES/DATA/scene1.dat scene1.img
|
||||
* ujprog -j flash -f 1048576 scene1.img
|
||||
* (using latest version of ujprog compiled from https://github.com/kost/fujprog)
|
||||
*
|
||||
* More details and links in EXAMPLES/DATA/notes.txt
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef __linux__
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#else
|
||||
#include "io.h"
|
||||
#endif
|
||||
|
||||
// when compiling for SPI flash, uncomment to fit some routines in fast BRAM
|
||||
// (but it does not change much, the bottleneck is ANSI RGB encoding and uart.
|
||||
//#define RV32_FASTCODE __attribute((section(".fastcode")))
|
||||
#define RV32_FASTCODE
|
||||
|
||||
// when compiling for SPI flash, uncomment to enable wireframe mode (but it is ugly
|
||||
// and it will not fit in BRAM !)
|
||||
// #define WITH_WIREFRAME
|
||||
|
||||
#ifdef WITH_WIREFRAME
|
||||
int wireframe = 0;
|
||||
#endif
|
||||
|
||||
#define MIN(x,y) ((x) < (y) ? (x) : (y))
|
||||
#define MAX(x,y) ((x) > (y) ? (x) : (y))
|
||||
|
||||
|
||||
/**********************************************************************************/
|
||||
/* Graphics routines */
|
||||
/**********************************************************************************/
|
||||
|
||||
|
||||
// Map coordinates from file to screen
|
||||
|
||||
static inline uint8_t map_x(uint8_t x) {
|
||||
return x >> 1;
|
||||
}
|
||||
|
||||
static inline uint8_t map_y(uint8_t y) {
|
||||
return y >> 2;
|
||||
}
|
||||
|
||||
void GL_clear() {
|
||||
printf("\033[48;5;16m" // set background color black
|
||||
"\033[2J"); // clear screen
|
||||
}
|
||||
|
||||
/*
|
||||
* Set background color using 6x6x6 colorcube codes
|
||||
* see https://stackoverflow.com/questions/4842424/list-of-ansi-color-escape-sequences
|
||||
*/
|
||||
static inline void GL_setcolor(int color) {
|
||||
static int last_color = -1;
|
||||
if(color != last_color) {
|
||||
printf("\033[48;5;%dm",color);
|
||||
}
|
||||
last_color = color;
|
||||
}
|
||||
|
||||
static inline void GL_setpixel(int x, int y) {
|
||||
printf("\033[%d;%dH ",y,x); // Goto_XY(x1,y) and print space
|
||||
}
|
||||
|
||||
#ifdef WITH_WIREFRAME
|
||||
void GL_line(int x1, int y1, int x2, int y2) RV32_FASTCODE;
|
||||
void GL_line(int x1, int y1, int x2, int y2) {
|
||||
int x,y,dx,dy,sy,tmp;
|
||||
|
||||
// Swap both extremities to ensure x increases
|
||||
if(x2 < x1) {
|
||||
tmp = x2;
|
||||
x2 = x1;
|
||||
x1 = tmp;
|
||||
tmp = y2;
|
||||
y2 = y1;
|
||||
y1 = tmp;
|
||||
}
|
||||
|
||||
// Bresenham line drawing.
|
||||
dy = y2 - y1;
|
||||
sy = 1;
|
||||
if(dy < 0) {
|
||||
sy = -1;
|
||||
dy = -dy;
|
||||
}
|
||||
|
||||
dx = x2 - x1;
|
||||
|
||||
x = x1;
|
||||
y = y1;
|
||||
|
||||
if(dy > dx) {
|
||||
int ex = (dx << 1) - dy;
|
||||
for(int u=0; u<dy; u++) {
|
||||
GL_setpixel(x,y);
|
||||
y += sy;
|
||||
if(ex >= 0) {
|
||||
x++;
|
||||
ex -= dy << 1;
|
||||
GL_setpixel(x,y);
|
||||
}
|
||||
while(ex >= 0) {
|
||||
x++;
|
||||
ex -= dy << 1;
|
||||
putchar(' ');
|
||||
}
|
||||
ex += dx << 1;
|
||||
}
|
||||
} else {
|
||||
int ey = (dy << 1) - dx;
|
||||
for(int u=0; u<dx; u++) {
|
||||
GL_setpixel(x,y);
|
||||
x++;
|
||||
while(ey >= 0) {
|
||||
y += sy;
|
||||
ey -= dx << 1;
|
||||
GL_setpixel(x,y);
|
||||
}
|
||||
ey += dy << 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void GL_fillpoly(int nb_pts, int* points) RV32_FASTCODE;
|
||||
void GL_fillpoly(int nb_pts, int* points) {
|
||||
static int last_color = -1;
|
||||
|
||||
char x_left[128];
|
||||
char x_right[128];
|
||||
|
||||
/* Determine clockwise, miny, maxy */
|
||||
int clockwise = 0;
|
||||
int miny = 256;
|
||||
int maxy = -256;
|
||||
|
||||
for(int i1=0; i1<nb_pts; ++i1) {
|
||||
int i2=(i1==nb_pts-1) ? 0 : i1+1;
|
||||
int i3=(i2==nb_pts-1) ? 0 : i2+1;
|
||||
int x1 = points[2*i1];
|
||||
int y1 = points[2*i1+1];
|
||||
int dx1 = points[2*i2] - x1;
|
||||
int dy1 = points[2*i2+1] - y1;
|
||||
int dx2 = points[2*i3] - x1;
|
||||
int dy2 = points[2*i3+1] - y1;
|
||||
clockwise += dx1 * dy2 - dx2 * dy1;
|
||||
miny = MIN(miny,y1);
|
||||
maxy = MAX(maxy,y1);
|
||||
}
|
||||
|
||||
/* Determine x_left and x_right for each scaline */
|
||||
for(int i1=0; i1<nb_pts; ++i1) {
|
||||
int i2=(i1==nb_pts-1) ? 0 : i1+1;
|
||||
|
||||
int x1 = points[2*i1];
|
||||
int y1 = points[2*i1+1];
|
||||
int x2 = points[2*i2];
|
||||
int y2 = points[2*i2+1];
|
||||
|
||||
#ifdef WITH_WIREFRAME
|
||||
if(wireframe) {
|
||||
if((clockwise > 0) ^ (y2 > y1)) {
|
||||
GL_line(x1,y1,x2,y2);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
|
||||
char* x_buffer = ((clockwise > 0) ^ (y2 > y1)) ? x_left : x_right;
|
||||
int dx = x2 - x1;
|
||||
int sx = 1;
|
||||
int dy = y2 - y1;
|
||||
int sy = 1;
|
||||
int x = x1;
|
||||
int y = y1;
|
||||
int ex;
|
||||
|
||||
if(dx < 0) {
|
||||
sx = -1;
|
||||
dx = -dx;
|
||||
}
|
||||
|
||||
if(dy < 0) {
|
||||
sy = -1;
|
||||
dy = -dy;
|
||||
}
|
||||
|
||||
if(y1 == y2) {
|
||||
x_left[y1] = MIN(x1,x2);
|
||||
x_right[y1] = MAX(x1,x2);
|
||||
continue;
|
||||
}
|
||||
|
||||
ex = (dx << 1) - dy;
|
||||
|
||||
for(int u=0; u <= dy; ++u) {
|
||||
x_buffer[y] = x;
|
||||
y += sy;
|
||||
while(ex >= 0) {
|
||||
x += sx;
|
||||
ex -= dy << 1;
|
||||
}
|
||||
ex += dx << 1;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef WITH_WIREFRAME
|
||||
if(!wireframe)
|
||||
#endif
|
||||
{
|
||||
for(int y = miny; y <= maxy; ++y) {
|
||||
int x1 = x_left[y];
|
||||
int x2 = x_right[y];
|
||||
printf("\033[%d;%dH",y,x1); // Goto_XY(x1,y)
|
||||
for(int x=x1; x<x2; ++x) {
|
||||
putchar(' ');
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************************************/
|
||||
|
||||
/*
|
||||
* Starting address of data stream stored in the
|
||||
* SPI.
|
||||
* I put the data stream starting from 1M offset,
|
||||
* just to make sure it does not collide with
|
||||
* FPGA wiring configuration ! (but FPGA configuration
|
||||
* only takes a few tenth of kilobytes I think).
|
||||
* Using the IO interface, it is using the physical address
|
||||
* (starting at 1M). Using the mapped memory interface,
|
||||
* SPI_FLASH_BASE is mapped to 1M.
|
||||
*/
|
||||
uint32_t spi_addr = 0;
|
||||
|
||||
/*
|
||||
* Word address and cached word used in mapped mode
|
||||
*/
|
||||
uint32_t spi_word_addr = 0;
|
||||
union {
|
||||
uint32_t spi_word;
|
||||
uint8_t spi_bytes[4];
|
||||
} spi_u;
|
||||
|
||||
#define ADDR_OFFSET 1024*1024
|
||||
|
||||
/*
|
||||
* Restarts reading from the beginning of the stream.
|
||||
*/
|
||||
void spi_reset() {
|
||||
spi_addr = ADDR_OFFSET;
|
||||
spi_word_addr = (uint32_t)(-1);
|
||||
}
|
||||
|
||||
|
||||
#ifdef __linux__
|
||||
|
||||
FILE* f = NULL;
|
||||
|
||||
/**
|
||||
* Reads one byte of data from the file (emulates read_spi_byte() when running on desktop)
|
||||
*/
|
||||
uint8_t next_spi_byte() {
|
||||
uint8_t result;
|
||||
if(f == NULL) {
|
||||
f = fopen("../../../FIRMWARE/EXAMPLES/DATA/scene1.dat","rb");
|
||||
if(f == NULL) {
|
||||
printf("Could not open data file\n");
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
if(spi_word_addr != spi_addr >> 2) {
|
||||
spi_word_addr = spi_addr >> 2;
|
||||
fseek(f, spi_word_addr*4-ADDR_OFFSET, SEEK_SET);
|
||||
fread(&(spi_u.spi_word), 4, 1, f);
|
||||
}
|
||||
result = spi_u.spi_bytes[spi_addr&3];
|
||||
++spi_addr;
|
||||
return (uint8_t)(result);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
|
||||
# define SPI_FLASH_BASE ((uint32_t*)(1 << 23))
|
||||
|
||||
/**
|
||||
* Reads one byte from the SPI flash, using the mapped SPI flash interface.
|
||||
*/
|
||||
static inline uint8_t next_spi_byte() {
|
||||
uint8_t result;
|
||||
if(spi_word_addr != spi_addr >> 2) {
|
||||
spi_word_addr = spi_addr >> 2;
|
||||
spi_u.spi_word = SPI_FLASH_BASE[spi_word_addr];
|
||||
}
|
||||
result = spi_u.spi_bytes[spi_addr&3];
|
||||
++spi_addr;
|
||||
return (uint8_t)(result);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static inline uint16_t next_spi_word() {
|
||||
/* In the ST-NICCC file,
|
||||
* words are stored in big endian format.
|
||||
* (see DATA/scene_description.txt).
|
||||
*/
|
||||
uint16_t hi = (uint16_t)next_spi_byte();
|
||||
uint16_t lo = (uint16_t)next_spi_byte();
|
||||
return (hi << 8) | lo;
|
||||
}
|
||||
|
||||
/*
|
||||
* The colormap, encoded in such a way that it
|
||||
* can be directly sent as ANSI color codes.
|
||||
*/
|
||||
int cmap[16];
|
||||
|
||||
/*
|
||||
* Current frame's vertices coordinates (if frame is indexed),
|
||||
* mapped to OLED display dimensions (divide by 2 from file).
|
||||
*/
|
||||
uint8_t X[255];
|
||||
uint8_t Y[255];
|
||||
|
||||
/*
|
||||
* Current polygon vertices, as expected
|
||||
* by GL_fillpoly():
|
||||
* xi = poly[2*i], yi = poly[2*i+1]
|
||||
*/
|
||||
int poly[30];
|
||||
|
||||
/*
|
||||
* Masks for frame flags.
|
||||
*/
|
||||
#define CLEAR_BIT 1
|
||||
#define PALETTE_BIT 2
|
||||
#define INDEXED_BIT 4
|
||||
|
||||
/*
|
||||
* Reads a frame's polygonal description from
|
||||
* SPI flash and rasterizes the polygons using
|
||||
* FemtoGL.
|
||||
* returns 0 if last frame.
|
||||
* See DATA/scene_description.txt for the
|
||||
* ST-NICCC file format.
|
||||
* See DATA/test_ST_NICCC.c for an example
|
||||
* program.
|
||||
*/
|
||||
int read_frame() RV32_FASTCODE;
|
||||
int read_frame() {
|
||||
uint8_t frame_flags = next_spi_byte();
|
||||
|
||||
// Update palette data.
|
||||
if(frame_flags & PALETTE_BIT) {
|
||||
uint16_t colors = next_spi_word();
|
||||
for(int b=15; b>=0; --b) {
|
||||
if(colors & (1 << b)) {
|
||||
int rgb = next_spi_word();
|
||||
|
||||
// Get the three 3-bits per component R,G,B
|
||||
int b3 = (rgb & 0x007);
|
||||
int g3 = (rgb & 0x070) >> 4;
|
||||
int r3 = (rgb & 0x700) >> 8;
|
||||
|
||||
// Re-encode them as ANSI 8-bits color
|
||||
b3 = b3 * 6 / 8;
|
||||
g3 = g3 * 6 / 8;
|
||||
r3 = r3 * 6 / 8;
|
||||
cmap[15-b] = 16 + b3 + 6*(g3 + 6*r3);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(frame_flags & CLEAR_BIT) {
|
||||
// GL_clear();
|
||||
}
|
||||
|
||||
// Update vertices
|
||||
if(frame_flags & INDEXED_BIT) {
|
||||
uint8_t nb_vertices = next_spi_byte();
|
||||
for(int v=0; v<nb_vertices; ++v) {
|
||||
X[v] = map_x(next_spi_byte());
|
||||
Y[v] = map_y(next_spi_byte());
|
||||
}
|
||||
}
|
||||
|
||||
// Draw frame's polygons
|
||||
for(;;) {
|
||||
uint8_t poly_desc = next_spi_byte();
|
||||
|
||||
// Special polygon codes (end of frame,
|
||||
// seek next block, end of stream)
|
||||
|
||||
if(poly_desc == 0xff) {
|
||||
break; // end of frame
|
||||
}
|
||||
if(poly_desc == 0xfe) {
|
||||
// Go to next 64kb block
|
||||
spi_addr -= ADDR_OFFSET;
|
||||
spi_addr &= ~65535;
|
||||
spi_addr += 65536;
|
||||
spi_addr += ADDR_OFFSET;
|
||||
return 1;
|
||||
}
|
||||
if(poly_desc == 0xfd) {
|
||||
return 0; // end of stream
|
||||
}
|
||||
|
||||
uint8_t nvrtx = poly_desc & 15;
|
||||
uint8_t poly_col = poly_desc >> 4;
|
||||
for(int i=0; i<nvrtx; ++i) {
|
||||
if(frame_flags & INDEXED_BIT) {
|
||||
uint8_t index = next_spi_byte();
|
||||
poly[2*i] = X[index];
|
||||
poly[2*i+1] = Y[index];
|
||||
} else {
|
||||
poly[2*i] = map_x(next_spi_byte());
|
||||
poly[2*i+1] = map_y(next_spi_byte());
|
||||
}
|
||||
}
|
||||
GL_setcolor(cmap[poly_col]);
|
||||
GL_fillpoly(nvrtx,poly);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
int main() {
|
||||
// printf("\x1B[?25l"); // hide cursor
|
||||
|
||||
#ifndef __linux__
|
||||
IO_OUT(IO_LEDS,15);
|
||||
#endif
|
||||
printf("starting\n");
|
||||
|
||||
#ifdef WITH_WIREFRAME
|
||||
wireframe = 0;
|
||||
#endif
|
||||
int frame = 0;
|
||||
GL_clear();
|
||||
for(;;) {
|
||||
spi_reset();
|
||||
frame = 0;
|
||||
while(read_frame()) {
|
||||
#ifdef WITH_WIREFRAME
|
||||
if(wireframe) {
|
||||
GL_clear();
|
||||
}
|
||||
#endif
|
||||
#ifdef __linux__
|
||||
usleep(20000);
|
||||
#else
|
||||
IO_OUT(IO_LEDS,frame);
|
||||
#endif
|
||||
++frame;
|
||||
}
|
||||
#ifdef WITH_WIREFRAME
|
||||
wireframe = !wireframe;
|
||||
#endif
|
||||
}
|
||||
}
|
21
FIRMWARE/blinker.S
Normal file
21
FIRMWARE/blinker.S
Normal file
@@ -0,0 +1,21 @@
|
||||
# Simple blinker
|
||||
|
||||
.equ IO_BASE, 0x400000
|
||||
.equ IO_LEDS, 4
|
||||
|
||||
.section .text
|
||||
|
||||
.globl main
|
||||
|
||||
main:
|
||||
.L0:
|
||||
|
||||
li t0, 5
|
||||
sw t0, IO_LEDS(gp)
|
||||
call wait
|
||||
li t0, 10
|
||||
sw t0, IO_LEDS(gp)
|
||||
call wait
|
||||
j .L0
|
||||
|
||||
|
13
FIRMWARE/bram.ld
Normal file
13
FIRMWARE/bram.ld
Normal file
@@ -0,0 +1,13 @@
|
||||
MEMORY
|
||||
{
|
||||
BRAM (RWX) : ORIGIN = 0x0000, LENGTH = 0x1800 /* 6kB RAM */
|
||||
}
|
||||
SECTIONS
|
||||
{
|
||||
everything :
|
||||
{
|
||||
. = ALIGN(4);
|
||||
start.o (.text)
|
||||
*(.*)
|
||||
} >BRAM
|
||||
}
|
7
FIRMWARE/dhrystones.c
Normal file
7
FIRMWARE/dhrystones.c
Normal file
@@ -0,0 +1,7 @@
|
||||
#define RISCV
|
||||
#define TIME
|
||||
#define USE_MYSTDLIB
|
||||
|
||||
#include "DHRYSTONE/dhry_1.c"
|
||||
#include "DHRYSTONE/dhry_2.c"
|
||||
#include "DHRYSTONE/stubs.c"
|
182
FIRMWARE/donut.c
Normal file
182
FIRMWARE/donut.c
Normal file
@@ -0,0 +1,182 @@
|
||||
// donut.c by Andy Sloane (@a1k0n)
|
||||
// https://gist.github.com/a1k0n/8ea6516b4946ab36348fb61703dc3194
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <math.h>
|
||||
|
||||
#define WITH_RV32M
|
||||
|
||||
#define debug(...)
|
||||
//#define debug printf
|
||||
|
||||
// torus radii and distance from camera
|
||||
// these are pretty baked-in to other constants now, so it probably won't work
|
||||
// if you change them too much.
|
||||
const int dz = 5, r1 = 1, r2 = 2;
|
||||
|
||||
// "Magic circle algorithm"? DDA? I've seen this formulation in a few places;
|
||||
// first in Hal Chamberlain's Musical Applications of Microprocessors, but not
|
||||
// sure what to call it, or how to justify it theoretically. It seems to
|
||||
// correctly rotate around a point "near" the origin, without losing magnitude
|
||||
// over long periods of time, as long as there are enough bits of precision in x
|
||||
// and y. I use 14 bits here.
|
||||
#define R(s,x,y) x-=(y>>s); y+=(x>>s)
|
||||
|
||||
// CORDIC algorithm to find magnitude of |x,y| by rotating the x,y vector onto
|
||||
// the x axis. This also brings vector (x2,y2) along for the ride, and writes
|
||||
// back to x2 -- this is used to rotate the lighting vector from the normal of
|
||||
// the torus surface towards the camera, and thus determine the lighting amount.
|
||||
// We only need to keep one of the two lighting normal coordinates.
|
||||
int length_cordic(int16_t x, int16_t y, int16_t *x2_, int16_t y2) {
|
||||
int x2 = *x2_;
|
||||
if (x < 0) { // start in right half-plane
|
||||
x = -x;
|
||||
x2 = -x2;
|
||||
}
|
||||
for (int i = 0; i < 8; i++) {
|
||||
int t = x;
|
||||
int t2 = x2;
|
||||
if (y < 0) {
|
||||
x -= y >> i;
|
||||
y += t >> i;
|
||||
x2 -= y2 >> i;
|
||||
y2 += t2 >> i;
|
||||
} else {
|
||||
x += y >> i;
|
||||
y -= t >> i;
|
||||
x2 += y2 >> i;
|
||||
y2 -= t2 >> i;
|
||||
}
|
||||
}
|
||||
// divide by 0.625 as a cheap approximation to the 0.607 scaling factor factor
|
||||
// introduced by this algorithm (see https://en.wikipedia.org/wiki/CORDIC)
|
||||
*x2_ = (x2 >> 1) + (x2 >> 3);
|
||||
return (x >> 1) + (x >> 3);
|
||||
}
|
||||
|
||||
void main() {
|
||||
// high-precision rotation directions, sines and cosines and their products
|
||||
int16_t sB = 0, cB = 16384;
|
||||
int16_t sA = 11583, cA = 11583;
|
||||
int16_t sAsB = 0, cAsB = 0;
|
||||
int16_t sAcB = 11583, cAcB = 11583;
|
||||
|
||||
for (;;) {
|
||||
int x1_16 = cAcB << 2;
|
||||
|
||||
// yes this is a multiply but dz is 5 so it's (sb + (sb<<2)) >> 6 effectively
|
||||
int p0x = dz * sB >> 6;
|
||||
int p0y = dz * sAcB >> 6;
|
||||
int p0z = -dz * cAcB >> 6;
|
||||
|
||||
const int r1i = r1*256;
|
||||
const int r2i = r2*256;
|
||||
|
||||
int niters = 0;
|
||||
int nnormals = 0;
|
||||
int16_t yincC = (cA >> 6) + (cA >> 5); // 12*cA >> 8;
|
||||
int16_t yincS = (sA >> 6) + (sA >> 5); // 12*sA >> 8;
|
||||
int16_t xincX = (cB >> 7) + (cB >> 6); // 6*cB >> 8;
|
||||
int16_t xincY = (sAsB >> 7) + (sAsB >> 6); // 6*sAsB >> 8;
|
||||
int16_t xincZ = (cAsB >> 7) + (cAsB >> 6); // 6*cAsB >> 8;
|
||||
int16_t ycA = -((cA >> 1) + (cA >> 4)); // -12 * yinc1 = -9*cA >> 4;
|
||||
int16_t ysA = -((sA >> 1) + (sA >> 4)); // -12 * yinc2 = -9*sA >> 4;
|
||||
//int dmin = INT_MAX, dmax = -INT_MAX;
|
||||
for (int j = 0; j < 23; j++, ycA += yincC, ysA += yincS) {
|
||||
int xsAsB = (sAsB >> 4) - sAsB; // -40*xincY
|
||||
int xcAsB = (cAsB >> 4) - cAsB; // -40*xincZ;
|
||||
|
||||
int16_t vxi14 = (cB >> 4) - cB - sB; // -40*xincX - sB;
|
||||
int16_t vyi14 = ycA - xsAsB - sAcB;
|
||||
int16_t vzi14 = ysA + xcAsB + cAcB;
|
||||
|
||||
for (int i = 0; i < 79; i++, vxi14 += xincX, vyi14 -= xincY, vzi14 += xincZ) {
|
||||
int t = 512; // (256 * dz) - r2i - r1i;
|
||||
|
||||
int16_t px = p0x + (vxi14 >> 5); // assuming t = 512, t*vxi>>8 == vxi<<1
|
||||
int16_t py = p0y + (vyi14 >> 5);
|
||||
int16_t pz = p0z + (vzi14 >> 5);
|
||||
debug("pxyz (%+4d,%+4d,%+4d)\n", px, py, pz);
|
||||
int16_t lx0 = sB >> 2;
|
||||
int16_t ly0 = sAcB - cA >> 2;
|
||||
int16_t lz0 = -cAcB - sA >> 2;
|
||||
for (;;) {
|
||||
int t0, t1, t2, d;
|
||||
int16_t lx = lx0, ly = ly0, lz = lz0;
|
||||
debug("[%2d,%2d] (px, py) = (%d, %d), (lx, ly) = (%d, %d) -> ", j, i, px, py, lx, ly);
|
||||
t0 = length_cordic(px, py, &lx, ly);
|
||||
debug("t0=%d (lx', ly') = (%d, %d)\n", t0, lx, ly);
|
||||
t1 = t0 - r2i;
|
||||
t2 = length_cordic(pz, t1, &lz, lx);
|
||||
d = t2 - r1i;
|
||||
t += d;
|
||||
|
||||
if (t > 8*256) {
|
||||
putchar(' ');
|
||||
break;
|
||||
} else if (d < 2) {
|
||||
int N = lz >> 9;
|
||||
putchar(".,-~:;!*=#$@"[N > 0 ? N < 12 ? N : 11 : 0]);
|
||||
nnormals++;
|
||||
break;
|
||||
}
|
||||
// todo: shift and add version of this
|
||||
|
||||
|
||||
/*
|
||||
if (d < dmin) dmin = d;
|
||||
if (d > dmax) dmax = d;
|
||||
*/
|
||||
|
||||
#ifdef WITH_RV32M
|
||||
px += d*vxi14 >> 14;
|
||||
py += d*vyi14 >> 14;
|
||||
pz += d*vzi14 >> 14;
|
||||
#else
|
||||
{
|
||||
// 11x1.14 fixed point 3x parallel multiply
|
||||
// only 16 bit registers needed; starts from highest bit to lowest
|
||||
// d is about 2..1100, so 11 bits are sufficient
|
||||
int16_t dx = 0, dy = 0, dz = 0;
|
||||
int16_t a = vxi14, b = vyi14, c = vzi14;
|
||||
while (d) {
|
||||
if (d&1024) {
|
||||
dx += a;
|
||||
dy += b;
|
||||
dz += c;
|
||||
}
|
||||
d = (d&1023) << 1;
|
||||
a >>= 1;
|
||||
b >>= 1;
|
||||
c >>= 1;
|
||||
}
|
||||
// we already shifted down 10 bits, so get the last four
|
||||
px += dx >> 4;
|
||||
py += dy >> 4;
|
||||
pz += dz >> 4;
|
||||
}
|
||||
#endif
|
||||
niters++;
|
||||
}
|
||||
}
|
||||
puts("");
|
||||
}
|
||||
printf("%d iterations %d lit pixels\x1b[K", niters, nnormals);
|
||||
// fflush(stdout);
|
||||
|
||||
// rotate sines, cosines, and products thereof
|
||||
// this animates the torus rotation about two axes
|
||||
R(5, cA, sA);
|
||||
R(5, cAsB, sAsB);
|
||||
R(5, cAcB, sAcB);
|
||||
R(6, cB, sB);
|
||||
R(6, cAcB, cAsB);
|
||||
R(6, sAcB, sAsB);
|
||||
|
||||
// usleep(15000);
|
||||
printf("\r\x1b[23A");
|
||||
}
|
||||
}
|
427
FIRMWARE/donut2.c
Normal file
427
FIRMWARE/donut2.c
Normal file
@@ -0,0 +1,427 @@
|
||||
// donut.c by Andy Sloane (@a1k0n)
|
||||
// https://gist.github.com/a1k0n/8ea6516b4946ab36348fb61703dc3194
|
||||
// Bruno Levy: added ANSI "pseudo-graphics", and RISC-V statistics
|
||||
|
||||
#define CPU_NAME "TordBoyau ULX3S" // Name of your CPU and FPGA board
|
||||
#define MHZ 95 // Frequency (without a timer we cannot guess)
|
||||
#define USE_MUL // Define if you support RV32M
|
||||
|
||||
// #define PRECISE // Define for a more accurate result (but it costs a bit)
|
||||
#define START_FRAMES 20 // Number of frames without display
|
||||
// (for accurate CPI/MIPS measurements)
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <math.h>
|
||||
|
||||
// 0 15 31 47 63 79 96 112 127 143 159 175 191 207 223 240 255
|
||||
|
||||
const char* colormap[34] = {
|
||||
"0",
|
||||
"8;5;232",
|
||||
"8;5;233",
|
||||
"8;5;234",
|
||||
"8;5;235",
|
||||
"8;5;236",
|
||||
"8;5;237",
|
||||
"8;5;238",
|
||||
"8;5;239",
|
||||
"8;5;240",
|
||||
"8;5;241",
|
||||
"8;5;242",
|
||||
"8;5;243",
|
||||
"8;5;244",
|
||||
"8;5;245",
|
||||
"8;5;246",
|
||||
"8;5;247",
|
||||
"8;5;248",
|
||||
"8;5;249",
|
||||
"8;5;250",
|
||||
"8;5;251",
|
||||
"8;5;252",
|
||||
"8;5;253",
|
||||
"8;5;254",
|
||||
"8;5;255",
|
||||
"7",
|
||||
"8;5;16",
|
||||
"8;5;17",
|
||||
"8;5;18",
|
||||
"8;5;19",
|
||||
"8;5;20",
|
||||
"8;5;21",
|
||||
"8;5;22",
|
||||
"8;5;23",
|
||||
};
|
||||
|
||||
int prev_color1=0;
|
||||
int prev_color2=0;
|
||||
|
||||
char scanline[80];
|
||||
|
||||
#ifdef __linux__
|
||||
|
||||
uint64_t my_rdcycle() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint64_t my_rdinstret() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
uint64_t my_rdcycle() {
|
||||
uint64_t result;
|
||||
uint32_t a0,a1,t0;
|
||||
{
|
||||
__asm__ __volatile__ ("rdcycleh %0" : "=r" (a1));
|
||||
__asm__ __volatile__ ("rdcycle %0" : "=r" (a0));
|
||||
__asm__ __volatile__ ("rdcycleh %0" : "=r" (t0));
|
||||
} while(t0 != a1);
|
||||
|
||||
return ((uint64_t)a1 << 32) | a0;
|
||||
}
|
||||
|
||||
uint64_t my_rdinstret() {
|
||||
uint64_t result;
|
||||
uint32_t a0,a1,t0;
|
||||
{
|
||||
__asm__ __volatile__ ("rdinstreth %0" : "=r" (a1));
|
||||
__asm__ __volatile__ ("rdinstret %0" : "=r" (a0));
|
||||
__asm__ __volatile__ ("rdinstreth %0" : "=r" (t0));
|
||||
} while(t0 != a1);
|
||||
|
||||
return ((uint64_t)a1 << 32) | a0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
uint64_t stats_cycles_init = 0;
|
||||
uint64_t stats_instructions_init = 0;
|
||||
uint64_t stats_cycles = 0;
|
||||
uint64_t stats_instructions = 0;
|
||||
int stats_CPI_times_1000 = 0;
|
||||
|
||||
void stats_start() {
|
||||
stats_cycles_init = my_rdcycle();
|
||||
stats_instructions_init = my_rdinstret();
|
||||
}
|
||||
|
||||
void stats_end() {
|
||||
stats_cycles = my_rdcycle() - stats_cycles_init;
|
||||
stats_instructions = my_rdinstret() - stats_instructions_init;
|
||||
if(stats_cycles==0) {
|
||||
stats_cycles++;
|
||||
}
|
||||
if(stats_instructions==0) {
|
||||
stats_instructions++;
|
||||
}
|
||||
stats_CPI_times_1000 = (int)((stats_cycles * 1000)/stats_instructions);
|
||||
}
|
||||
|
||||
// Print "fixed point" number (integer/1000)
|
||||
static void printk(uint64_t kx) {
|
||||
int intpart = (int)(kx / 1000);
|
||||
int fracpart = (int)(kx % 1000);
|
||||
printf("%d.",intpart);
|
||||
if(fracpart<100) {
|
||||
printf("0");
|
||||
}
|
||||
if(fracpart<10) {
|
||||
printf("0");
|
||||
}
|
||||
printf("%d",fracpart);
|
||||
}
|
||||
|
||||
static inline void setcolors(int fg, int bg) {
|
||||
printf("\033[4%s;3%sm",colormap[bg],colormap[fg]);
|
||||
}
|
||||
|
||||
static inline void setpixel(int x, int y, int color) {
|
||||
if(y&1){
|
||||
int color1 = scanline[x];
|
||||
int color2 = color;
|
||||
if(color1 == color2) {
|
||||
if(prev_color1 == color1) {
|
||||
putchar(' ');
|
||||
} else {
|
||||
printf("\033[4%sm ",colormap[color1]);
|
||||
prev_color1 = color1;
|
||||
}
|
||||
} else {
|
||||
if(prev_color1 != color1 && prev_color2 != color2) {
|
||||
printf("\033[4%s;3%sm",colormap[color1],colormap[color2]);
|
||||
prev_color1 = color1;
|
||||
prev_color2 = color2;
|
||||
} else if(prev_color1 != color1) {
|
||||
printf("\033[4%sm",colormap[color1]);
|
||||
prev_color1 = color1;
|
||||
} else if(prev_color2 != color2) {
|
||||
printf("\033[3%sm",colormap[color2]);
|
||||
prev_color2 = color2;
|
||||
}
|
||||
printf("\u2583");
|
||||
}
|
||||
} else {
|
||||
scanline[x] = color;
|
||||
}
|
||||
}
|
||||
|
||||
#define debug(...)
|
||||
//#define debug printf
|
||||
|
||||
// torus radii and distance from camera
|
||||
// these are pretty baked-in to other constants now, so it probably won't work
|
||||
// if you change them too much.
|
||||
const int dz = 5, r1 = 1, r2 = 2;
|
||||
|
||||
// "Magic circle algorithm"? DDA? I've seen this formulation in a few places;
|
||||
// first in Hal Chamberlain's Musical Applications of Microprocessors, but not
|
||||
// sure what to call it, or how to justify it theoretically. It seems to
|
||||
// correctly rotate around a point "near" the origin, without losing magnitude
|
||||
// over long periods of time, as long as there are enough bits of precision in x
|
||||
// and y. I use 14 bits here.
|
||||
#define R(s,x,y) x-=(y>>s); y+=(x>>s)
|
||||
|
||||
// CORDIC algorithm to find magnitude of |x,y| by rotating the x,y vector onto
|
||||
// the x axis. This also brings vector (x2,y2) along for the ride, and writes
|
||||
// back to x2 -- this is used to rotate the lighting vector from the normal of
|
||||
// the torus surface towards the camera, and thus determine the lighting amount.
|
||||
// We only need to keep one of the two lighting normal coordinates.
|
||||
int length_cordic(int16_t x, int16_t y, int16_t *x2_, int16_t y2) {
|
||||
|
||||
#ifdef PRECISE
|
||||
#define NIT 10
|
||||
#else
|
||||
#define NIT 5
|
||||
#endif
|
||||
|
||||
int x2 = *x2_;
|
||||
if (x < 0) { // start in right half-plane
|
||||
x = -x;
|
||||
x2 = -x2;
|
||||
}
|
||||
for (int i = 0; i<NIT; i++) {
|
||||
int t = x;
|
||||
int t2 = x2;
|
||||
if (y < 0) {
|
||||
x -= y >> i;
|
||||
y += t >> i;
|
||||
x2 -= y2 >> i;
|
||||
y2 += t2 >> i;
|
||||
} else {
|
||||
x += y >> i;
|
||||
y -= t >> i;
|
||||
x2 += y2 >> i;
|
||||
y2 -= t2 >> i;
|
||||
}
|
||||
}
|
||||
// divide by 0.625 as a cheap approximation to the 0.607 scaling factor factor
|
||||
// introduced by this algorithm (see https://en.wikipedia.org/wiki/CORDIC)
|
||||
*x2_ = (x2 >> 1) + (x2 >> 3);
|
||||
return (x >> 1) + (x >> 3)
|
||||
#ifdef PRECISE
|
||||
- (x >> 6) // get nrearer to 0.607 [Inigo Quilez]
|
||||
#endif
|
||||
;
|
||||
}
|
||||
|
||||
int main() {
|
||||
|
||||
printf( "\033[48;5;16m" // set background color black
|
||||
"\033[38;5;15m" // set foreground color white
|
||||
"\033[H" // home
|
||||
"\033[?25l" // hide cursor
|
||||
"\033[2J"); // clear screen
|
||||
|
||||
int frame = 0;
|
||||
|
||||
// high-precision rotation directions, sines and cosines and their products
|
||||
int16_t sB = 0, cB = 16384;
|
||||
int16_t sA = 11583, cA = 11583;
|
||||
int16_t sAsB = 0, cAsB = 0;
|
||||
int16_t sAcB = 11583, cAcB = 11583;
|
||||
|
||||
int accurate_CPI_x_1000;
|
||||
int accurate_MIPS_x_1000;
|
||||
int CPI_x_1000;
|
||||
|
||||
stats_start();
|
||||
|
||||
for (;;) {
|
||||
|
||||
int display_on = (frame > START_FRAMES);
|
||||
if(display_on) {
|
||||
stats_start();
|
||||
}
|
||||
|
||||
int x1_16 = cAcB << 2;
|
||||
|
||||
// yes this is a multiply but dz is 5 so it's (sb + (sb<<2)) >> 6 effectively
|
||||
int p0x = dz * sB >> 6;
|
||||
int p0y = dz * sAcB >> 6;
|
||||
int p0z = -dz * cAcB >> 6;
|
||||
|
||||
const int r1i = r1*256;
|
||||
const int r2i = r2*256;
|
||||
|
||||
int niters = 0;
|
||||
int nnormals = 0;
|
||||
int16_t yincC = (cA >> 6) + (cA >> 5); // 12*cA >> 8;
|
||||
int16_t yincS = (sA >> 6) + (sA >> 5); // 12*sA >> 8;
|
||||
int16_t xincX = (cB >> 7) + (cB >> 6); // 6*cB >> 8;
|
||||
int16_t xincY = (sAsB >> 7) + (sAsB >> 6); // 6*sAsB >> 8;
|
||||
int16_t xincZ = (cAsB >> 7) + (cAsB >> 6); // 6*cAsB >> 8;
|
||||
int16_t ycA = -((cA >> 1) + (cA >> 4)); // -12 * yinc1 = -9*cA >> 4;
|
||||
int16_t ysA = -((sA >> 1) + (sA >> 4)); // -12 * yinc2 = -9*sA >> 4;
|
||||
//int dmin = INT_MAX, dmax = -INT_MAX;
|
||||
|
||||
int xsAsB = (sAsB >> 4) - sAsB; // -40*xincY
|
||||
int xcAsB = (cAsB >> 4) - cAsB; // -40*xincZ;
|
||||
|
||||
|
||||
for (int j = 0; j < 46; j++, ycA += yincC>>1, ysA += yincS>>1) {
|
||||
|
||||
int16_t vxi14 = (cB >> 4) - cB - sB; // -40*xincX - sB;
|
||||
int16_t vyi14 = ycA - xsAsB - sAcB;
|
||||
int16_t vzi14 = ysA + xcAsB + cAcB;
|
||||
|
||||
for (int i = 0; i < 79; i++, vxi14 += xincX, vyi14 -= xincY, vzi14 += xincZ) {
|
||||
int t = 512; // (256 * dz) - r2i - r1i;
|
||||
|
||||
int16_t px = p0x + (vxi14 >> 5); // assuming t = 512, t*vxi>>8 == vxi<<1
|
||||
int16_t py = p0y + (vyi14 >> 5);
|
||||
int16_t pz = p0z + (vzi14 >> 5);
|
||||
debug("pxyz (%+4d,%+4d,%+4d)\n", px, py, pz);
|
||||
int16_t lx0 = sB >> 2;
|
||||
int16_t ly0 = sAcB - cA >> 2;
|
||||
int16_t lz0 = -cAcB - sA >> 2;
|
||||
for (;;) {
|
||||
int t0, t1, t2, d;
|
||||
int16_t lx = lx0, ly = ly0, lz = lz0;
|
||||
debug("[%2d,%2d] (px, py) = (%d, %d), (lx, ly) = (%d, %d) -> ", j, i, px, py, lx, ly);
|
||||
t0 = length_cordic(px, py, &lx, ly);
|
||||
debug("t0=%d (lx', ly') = (%d, %d)\n", t0, lx, ly);
|
||||
t1 = t0 - r2i;
|
||||
t2 = length_cordic(pz, t1, &lz, lx);
|
||||
d = t2 - r1i;
|
||||
t += d;
|
||||
|
||||
if (t > 8*256) {
|
||||
// putchar(' ');
|
||||
int N = (((j-frame)>>3)^(((i+frame)>>3)))&1;
|
||||
if(display_on) setpixel(i,j,(N<<2)+26);
|
||||
break;
|
||||
} else if (d < 2) {
|
||||
int N = lz >> 8;
|
||||
// putchar(".,-~:;!*=#$@"[N > 0 ? N < 12 ? N : 11 : 0]);
|
||||
N = N > 0 ? N < 26 ? N : 25 : 0;
|
||||
if(display_on) setpixel(i,j,N);
|
||||
nnormals++;
|
||||
break;
|
||||
}
|
||||
// todo: shift and add version of this
|
||||
|
||||
/*
|
||||
if (d < dmin) dmin = d;
|
||||
if (d > dmax) dmax = d;
|
||||
*/
|
||||
|
||||
#ifdef USE_MUL
|
||||
px += d*vxi14 >> 14;
|
||||
py += d*vyi14 >> 14;
|
||||
pz += d*vzi14 >> 14;
|
||||
#else
|
||||
{
|
||||
// 11x1.14 fixed point 3x parallel multiply
|
||||
// only 16 bit registers needed; starts from highest bit to lowest
|
||||
// d is about 2..1100, so 11 bits are sufficient
|
||||
int16_t dx = 0, dy = 0, dz = 0;
|
||||
int16_t a = vxi14, b = vyi14, c = vzi14;
|
||||
while (d) {
|
||||
if (d&1024) {
|
||||
dx += a;
|
||||
dy += b;
|
||||
dz += c;
|
||||
}
|
||||
d = (d&1023) << 1;
|
||||
a >>= 1;
|
||||
b >>= 1;
|
||||
c >>= 1;
|
||||
}
|
||||
// we already shifted down 10 bits, so get the last four
|
||||
px += dx >> 4;
|
||||
py += dy >> 4;
|
||||
pz += dz >> 4;
|
||||
}
|
||||
#endif
|
||||
niters++;
|
||||
}
|
||||
}
|
||||
if(display_on && (j&1)) puts("");
|
||||
}
|
||||
if(display_on) printf("\033[0m"); // reset colors
|
||||
|
||||
stats_end();
|
||||
|
||||
if(frame == START_FRAMES) {
|
||||
accurate_CPI_x_1000 = stats_CPI_times_1000;
|
||||
accurate_MIPS_x_1000 = (MHZ * 1000000) / accurate_CPI_x_1000;
|
||||
}
|
||||
|
||||
CPI_x_1000 = stats_CPI_times_1000;
|
||||
|
||||
uint64_t FPS_num = (uint64_t)(MHZ) * 1000000 * 1000;
|
||||
uint64_t FPS_denom = stats_cycles;
|
||||
int FPSx1000 = (int)(FPS_num / FPS_denom);
|
||||
|
||||
setcolors(25,33);
|
||||
#ifdef USE_MUL
|
||||
printf("%s RV32IM %dMHz ", CPU_NAME, MHZ);
|
||||
#else
|
||||
printf("%s RV32I %dMHz ", CPU_NAME, MHZ);
|
||||
#endif
|
||||
|
||||
setcolors(25,0);
|
||||
printf(" "); printk(FPSx1000); printf(" FPS ");
|
||||
setcolors(0,25);
|
||||
printf(" "); printk(CPI_x_1000);
|
||||
printf(" ("); printk(accurate_CPI_x_1000); printf(") CPI ");
|
||||
setcolors(25,0);
|
||||
printf(" "); printk(accurate_MIPS_x_1000); printf(" MIPS");
|
||||
/*
|
||||
setcolors(0,25);
|
||||
printf(" %d iterations ", niters);
|
||||
setcolors(0,25);
|
||||
printf(" %d lit pixels ", nnormals);
|
||||
*/
|
||||
setcolors(25,0);
|
||||
printf("\x1b[K");
|
||||
|
||||
#ifdef __linux__
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
// rotate sines, cosines, and products thereof
|
||||
// this animates the torus rotation about two axes
|
||||
R(5, cA, sA);
|
||||
R(5, cAsB, sAsB);
|
||||
R(5, cAcB, sAcB);
|
||||
R(6, cB, sB);
|
||||
R(6, cAcB, cAsB);
|
||||
R(6, sAcB, sAsB);
|
||||
|
||||
#ifdef __linux__
|
||||
usleep(15000);
|
||||
#endif
|
||||
printf("\r\x1b[23A");
|
||||
++frame;
|
||||
prev_color1=-1;
|
||||
prev_color2=-1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
11
FIRMWARE/errno.c
Normal file
11
FIRMWARE/errno.c
Normal file
@@ -0,0 +1,11 @@
|
||||
|
||||
// Sometimes __errno is not linked, here is a dummy replacement.
|
||||
// Note that __errno is a function that returns a pointer to the
|
||||
// actual __errno (this is for multithreading). Made me bang my
|
||||
// head to the wall (and made tinyraytracer crash because powf()
|
||||
// was *calling* __errno).
|
||||
|
||||
int* __errno() {
|
||||
static int val = 0;
|
||||
return &val;
|
||||
}
|
27
FIRMWARE/hello.S
Normal file
27
FIRMWARE/hello.S
Normal file
@@ -0,0 +1,27 @@
|
||||
# Hello world !
|
||||
|
||||
.section .text
|
||||
.globl main
|
||||
|
||||
main:
|
||||
.L0:
|
||||
la a0, hello
|
||||
call putstring
|
||||
j .L0
|
||||
|
||||
putstring:
|
||||
addi sp,sp,-4 # save ra on the stack
|
||||
sw ra,0(sp) # (need to do that for functions that call functions)
|
||||
mv t2,a0
|
||||
.L1: lbu a0,0(t2)
|
||||
beqz a0,.L2
|
||||
call putchar
|
||||
addi t2,t2,1
|
||||
j .L1
|
||||
.L2: lw ra,0(sp) # restore ra
|
||||
addi sp,sp,4 # restore sp
|
||||
ret
|
||||
|
||||
.section .data
|
||||
hello:
|
||||
.asciz "Hello, world !\n"
|
113
FIRMWARE/humanshader.c
Normal file
113
FIRMWARE/humanshader.c
Normal file
@@ -0,0 +1,113 @@
|
||||
// C version of humanshader
|
||||
// See https://humanshader.com/
|
||||
// (using a computer is clearly not as fun, but it is interesting to have
|
||||
// a small not too computationally expensive raytracing program that
|
||||
// can run on small softcores for PGAs).
|
||||
// Using the 16-bits version with no divide from here: https://www.shadertoy.com/view/XflXDs
|
||||
|
||||
#define GL_width 71
|
||||
#define GL_height 40
|
||||
#include "GL_tty.h"
|
||||
|
||||
void human_shader(
|
||||
int x, int y, uint8_t* r_out, uint8_t* g_out, uint8_t* b_out
|
||||
) {
|
||||
int R, B;
|
||||
|
||||
//-------------------------
|
||||
// Section A (2 MUL, 3 ADD)
|
||||
//-------------------------
|
||||
int u = x-36;
|
||||
int v = 18-y;
|
||||
int u2 = u*u;
|
||||
int v2 = v*v;
|
||||
int h = u2 + v2;
|
||||
//-------------------------
|
||||
|
||||
if( h < 200 )
|
||||
{
|
||||
//-------------------------------------
|
||||
// Section B, Sphere (4/7 MUL, 5/9 ADD)
|
||||
//-------------------------------------
|
||||
R = 420;
|
||||
B = 520;
|
||||
|
||||
int t = 5200 + (h<<3);
|
||||
int p = (t*u)>>7;
|
||||
int q = (t*v)>>7;
|
||||
|
||||
// bounce light
|
||||
int w = 18 + (((p*5-q*13))>>9);
|
||||
if( w>0 ) R += w*w;
|
||||
|
||||
// sky light / ambient occlusion
|
||||
int o = q + 900;
|
||||
R = (R*o)>>12;
|
||||
B = (B*o)>>12;
|
||||
|
||||
// sun/key light
|
||||
if( p > -q )
|
||||
{
|
||||
int w = (p+q)>>3;
|
||||
R += w;
|
||||
B += w;
|
||||
}
|
||||
//-------------------------
|
||||
}
|
||||
else if( v<0 )
|
||||
{
|
||||
//-------------------------------------
|
||||
// Section C, Ground (5/9 MUL, 6/9 ADD)
|
||||
//-------------------------------------
|
||||
R = 150 + (v<<1);
|
||||
B = 50;
|
||||
|
||||
int p = h + (v2<<3);
|
||||
int c = 240*(-v) - p;
|
||||
|
||||
// sky light / ambient occlusion
|
||||
if( c>1200 )
|
||||
{
|
||||
int o = (25*c)>>3;
|
||||
o = (c*(7840-o)>>9) - 8560;
|
||||
R = (R*o)>>10;
|
||||
B = (B*o)>>10;
|
||||
}
|
||||
|
||||
// sun/key light with soft shadow
|
||||
int r = c + u*v;
|
||||
int d = 3200 - h - (r<<1);
|
||||
if( d>0 ) R += d;
|
||||
//-------------------------
|
||||
}
|
||||
else
|
||||
{
|
||||
//------------------------------
|
||||
// Section D, Sky (1 MUL, 2 ADD)
|
||||
//------------------------------
|
||||
int c = x + (y<<2);
|
||||
R = 132 + c;
|
||||
B = 192 + c;
|
||||
//-------------------------
|
||||
}
|
||||
|
||||
//-------------------------
|
||||
// Section E (3 MUL, 1 ADD)
|
||||
//-------------------------
|
||||
if(R > 255) R = 255;
|
||||
if(B > 255) B = 255;
|
||||
|
||||
int G = (R*11 + 5*B)>>4;
|
||||
//-------------------------
|
||||
|
||||
*r_out = (uint8_t)R;
|
||||
*g_out = (uint8_t)G;
|
||||
*b_out = (uint8_t)B;
|
||||
}
|
||||
|
||||
int main() {
|
||||
GL_init();
|
||||
GL_scan_RGB(GL_width, GL_height, human_shader);
|
||||
GL_terminate();
|
||||
return 0;
|
||||
}
|
10
FIRMWARE/io.h
Normal file
10
FIRMWARE/io.h
Normal file
@@ -0,0 +1,10 @@
|
||||
#include <stdint.h>
|
||||
|
||||
#define IO_BASE 0x400000
|
||||
#define IO_LEDS 4
|
||||
#define IO_UART_DAT 8
|
||||
#define IO_UART_CNTL 16
|
||||
|
||||
#define IO_IN(port) *(volatile uint32_t*)(IO_BASE + port)
|
||||
#define IO_OUT(port,val) *(volatile uint32_t*)(IO_BASE + port)=(val)
|
||||
|
99
FIRMWARE/mandel_C.c
Normal file
99
FIRMWARE/mandel_C.c
Normal file
@@ -0,0 +1,99 @@
|
||||
/*
|
||||
Computes and displays the Mandelbrot set on the OLED display.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef __linux__
|
||||
#include <unistd.h>
|
||||
#else
|
||||
#include "io.h"
|
||||
#endif
|
||||
|
||||
#define W 46
|
||||
#define H 46
|
||||
|
||||
#define mandel_shift 10
|
||||
#define mandel_mul (1 << mandel_shift)
|
||||
#define xmin -2*mandel_mul
|
||||
#define ymax 2*mandel_mul
|
||||
#define ymin -2*mandel_mul
|
||||
#define xmax 2*mandel_mul
|
||||
#define dx (xmax-xmin)/H
|
||||
#define dy (ymax-ymin)/H
|
||||
#define norm_max (4 << mandel_shift)
|
||||
|
||||
|
||||
#define ANSIRGB(R,G,B) "\033[48;2;" #R ";" #G ";" #B "m "
|
||||
|
||||
|
||||
const char* colormap[21] = {
|
||||
ANSIRGB( 0, 0, 0),
|
||||
ANSIRGB( 0, 0, 40),
|
||||
ANSIRGB( 0, 0, 80),
|
||||
ANSIRGB( 0, 0,120),
|
||||
ANSIRGB( 0, 0,160),
|
||||
ANSIRGB( 0, 0,200),
|
||||
ANSIRGB( 0, 0,240),
|
||||
|
||||
ANSIRGB( 0, 0, 0),
|
||||
ANSIRGB( 0, 40, 0),
|
||||
ANSIRGB( 0, 80, 0),
|
||||
ANSIRGB( 0,120, 0),
|
||||
ANSIRGB( 0,160, 0),
|
||||
ANSIRGB( 0,200, 0),
|
||||
ANSIRGB( 0,240, 0),
|
||||
|
||||
ANSIRGB( 0, 0, 0),
|
||||
ANSIRGB( 40, 0, 0),
|
||||
ANSIRGB( 80, 0, 0),
|
||||
ANSIRGB( 120, 0, 0),
|
||||
ANSIRGB( 160, 0, 0),
|
||||
ANSIRGB( 200, 0, 0),
|
||||
ANSIRGB( 240, 0, 0)
|
||||
};
|
||||
|
||||
int main() {
|
||||
int frame=0;
|
||||
for(;;) {
|
||||
IO_OUT(IO_LEDS,frame);
|
||||
int last_color = -1;
|
||||
printf("\033[H");
|
||||
int Ci = ymin;
|
||||
for(int Y=0; Y<H; ++Y) {
|
||||
int Cr = xmin;
|
||||
for(int X=0; X<W; ++X) {
|
||||
int Zr = Cr;
|
||||
int Zi = Ci;
|
||||
int iter = 20;
|
||||
while(iter > 0) {
|
||||
int Zrr = (Zr * Zr) >> mandel_shift;
|
||||
int Zii = (Zi * Zi) >> mandel_shift;
|
||||
int Zri = (Zr * Zi) >> (mandel_shift - 1);
|
||||
Zr = Zrr - Zii + Cr;
|
||||
Zi = Zri + Ci;
|
||||
if(Zrr + Zii > norm_max) {
|
||||
break;
|
||||
}
|
||||
--iter;
|
||||
}
|
||||
int color = (iter+frame)%21;
|
||||
printf(color == last_color ? " " : colormap[color]);
|
||||
last_color = color;
|
||||
Cr += dx;
|
||||
}
|
||||
Ci += dy;
|
||||
printf("\033[49m\n");
|
||||
last_color = -1;
|
||||
}
|
||||
++frame;
|
||||
#ifdef __linux__
|
||||
usleep(100000);
|
||||
#endif
|
||||
// if(frame>4) break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
125
FIRMWARE/mandelbrot.S
Normal file
125
FIRMWARE/mandelbrot.S
Normal file
@@ -0,0 +1,125 @@
|
||||
# Computes and displays the Mandelbrot set on the terminal.
|
||||
# Needs NRV_IO_UART to be enabled.
|
||||
#
|
||||
# To access it, use:
|
||||
# miniterm.py --dtr=0 /dev/ttyUSB1 115200
|
||||
# or screen /dev/ttyUSB1 115200 (<ctrl> a \ to exit)
|
||||
|
||||
|
||||
# Base address of memory-mapped IO,
|
||||
# Loaded into gp at startup
|
||||
.equ IO_BASE, 0x400000
|
||||
|
||||
# IO-reg offsets. To read or write one of them,
|
||||
# use IO_XXX(gp)
|
||||
.equ IO_LEDS, 4
|
||||
.equ IO_UART_DAT, 8
|
||||
.equ IO_UART_CNTL, 16
|
||||
|
||||
.equ mandel_shift, 10
|
||||
.equ mandel_mul,(1 << mandel_shift)
|
||||
.equ xmin, -2*mandel_mul
|
||||
.equ xmax, 2*mandel_mul
|
||||
.equ ymin, -2*mandel_mul
|
||||
.equ ymax, 2*mandel_mul
|
||||
.equ dx, (xmax-xmin)/80
|
||||
.equ dy, (ymax-ymin)/80
|
||||
.equ norm_max,(4 << mandel_shift)
|
||||
|
||||
.section .text
|
||||
|
||||
# X,Y : s0,s1
|
||||
# Cr,Ci : s2,s3
|
||||
# Zr,Zi : s4,s5
|
||||
# Zrr,2Zri,Zii: s6,s7,s8
|
||||
# cnt: s10
|
||||
# 128: s11
|
||||
|
||||
.globl main
|
||||
|
||||
main:
|
||||
mandelstart:
|
||||
|
||||
li t0, 5
|
||||
sw t0, IO_LEDS(gp)
|
||||
call wait
|
||||
li t0, 10
|
||||
sw t0, IO_LEDS(gp)
|
||||
call wait
|
||||
li t0, 5
|
||||
sw t0, IO_LEDS(gp)
|
||||
li t0, 10
|
||||
sw t0, IO_LEDS(gp)
|
||||
call wait
|
||||
li t0, 0
|
||||
sw t0, IO_LEDS(gp)
|
||||
|
||||
|
||||
li s1,0
|
||||
li s3,xmin
|
||||
li s11,80
|
||||
|
||||
loop_y: li s0,0
|
||||
li s2,ymin
|
||||
|
||||
loop_x: mv s4,s2 # Z <- C
|
||||
mv s5,s3
|
||||
|
||||
li s10,9 # iter <- 9
|
||||
|
||||
loop_Z: mv a0,s4 # Zrr <- (Zr*Zr) >> mandel_shift
|
||||
mv a1,s4
|
||||
call __mulsi3
|
||||
srli s6,a0,mandel_shift
|
||||
mv a0,s4 # Zri <- (Zr*Zi) >> (mandel_shift-1)
|
||||
mv a1,s5
|
||||
call __mulsi3
|
||||
srai s7,a0,mandel_shift-1
|
||||
mv a0,s5 # Zii <- (Zi*Zi) >> (mandel_shift)
|
||||
mv a1,s5
|
||||
call __mulsi3
|
||||
srli s8,a0,mandel_shift
|
||||
sub s4,s6,s8 # Zr <- Zrr - Zii + Cr
|
||||
add s4,s4,s2
|
||||
add s5,s7,s3 # Zi <- 2Zri + Cr
|
||||
|
||||
add s6,s6,s8 # if norm > norm max, exit loop
|
||||
li s7,norm_max
|
||||
bgt s6,s7,exit_Z
|
||||
|
||||
add s10,s10,-1 # iter--, loop if non-zero
|
||||
bnez s10, loop_Z
|
||||
exit_Z:
|
||||
la a0,colormap
|
||||
add a0,a0,s10
|
||||
lbu a0,0(a0)
|
||||
call putchar
|
||||
|
||||
add s0,s0,1
|
||||
add s2,s2,dx
|
||||
bne s0,s11,loop_x
|
||||
|
||||
li a0,13
|
||||
call putchar
|
||||
li a0,10
|
||||
call putchar
|
||||
|
||||
add s1,s1,1
|
||||
add s3,s3,dy
|
||||
bne s1,s11,loop_y
|
||||
|
||||
li t0, 15
|
||||
sw t0, IO_LEDS(gp)
|
||||
|
||||
call putchar
|
||||
li a0,13
|
||||
call putchar
|
||||
li a0,10
|
||||
call putchar
|
||||
|
||||
j mandelstart
|
||||
|
||||
.section .data
|
||||
colormap:
|
||||
.ascii " .,:;ox%#@"
|
||||
|
27
FIRMWARE/memcpy.c
Normal file
27
FIRMWARE/memcpy.c
Normal file
@@ -0,0 +1,27 @@
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#pragma GCC optimize ("no-tree-loop-distribute-patterns")
|
||||
|
||||
void* memcpy(void * dst, void const * src, size_t len) {
|
||||
uint32_t * plDst = (uint32_t *) dst;
|
||||
uint32_t const * plSrc = (uint32_t const *) src;
|
||||
|
||||
// If source and destination are aligned,
|
||||
// copy 32s bit by 32 bits.
|
||||
if (!((uint32_t)src & 3) && !((uint32_t)dst & 3)) {
|
||||
while (len >= 4) {
|
||||
*plDst++ = *plSrc++;
|
||||
len -= 4;
|
||||
}
|
||||
}
|
||||
|
||||
uint8_t* pcDst = (uint8_t *) plDst;
|
||||
uint8_t const* pcSrc = (uint8_t const *) plSrc;
|
||||
|
||||
while (len--) {
|
||||
*pcDst++ = *pcSrc++;
|
||||
}
|
||||
|
||||
return dst;
|
||||
}
|
26
FIRMWARE/notes.txt
Normal file
26
FIRMWARE/notes.txt
Normal file
@@ -0,0 +1,26 @@
|
||||
# https://blog.thea.codes/the-most-thoroughly-commented-linker-script/
|
||||
# https://interrupt.memfault.com/blog/how-to-write-linker-scripts-for-firmware
|
||||
|
||||
bin/riscv64-unknown-elf-as -march=rv32i -mabi=ilp32 -mno-relax mandelbrot_terminal.S -o mandelbrot_terminal.o
|
||||
riscv64-unknown-elf-ld mandelbrot_terminal.o -T baremetal.ld -m elf32lriscv -nostdlib -norelax
|
||||
/home/blevy/Programming/learn-fpga/FemtoRV/FIRMWARE/TOOLS/firmware_words a.elf -ram 6144 -hex a.hex
|
||||
|
||||
|
||||
FTDI 2232H
|
||||
|
||||
Rx >
|
||||
Tx <
|
||||
RTSn > Request to send
|
||||
CTSn < Clear to send
|
||||
DTRn > Data Terminal Ready
|
||||
DSRn < Data Set Ready
|
||||
DCDn > Data Carrier Detect
|
||||
|
||||
|
||||
#set_io DCDn 1
|
||||
#set_io DSRn 2
|
||||
#set_io DTRn 3
|
||||
#set_io CTSn 4
|
||||
#set_io RTSn 7
|
||||
set_io RS232_Tx_TTL 8
|
||||
set_io RS232_Rx_TTL 9
|
19
FIRMWARE/perf.S
Normal file
19
FIRMWARE/perf.S
Normal file
@@ -0,0 +1,19 @@
|
||||
.section .text
|
||||
.globl rdcycle
|
||||
.globl rdinstret
|
||||
|
||||
rdcycle:
|
||||
.L0:
|
||||
rdcycleh a1
|
||||
rdcycle a0
|
||||
rdcycleh t0
|
||||
bne a1,t0,.L0
|
||||
ret
|
||||
|
||||
rdinstret:
|
||||
.L1:
|
||||
rdinstreth a1
|
||||
rdinstret a0
|
||||
rdinstreth t0
|
||||
bne a1,t0,.L1
|
||||
ret
|
4
FIRMWARE/perf.h
Normal file
4
FIRMWARE/perf.h
Normal file
@@ -0,0 +1,4 @@
|
||||
#include <stdint.h>
|
||||
|
||||
extern uint64_t rdcycle();
|
||||
extern uint64_t rdinstret();
|
186
FIRMWARE/pi.c
Normal file
186
FIRMWARE/pi.c
Normal file
@@ -0,0 +1,186 @@
|
||||
/*
|
||||
* Computation of the n'th decimal digit of \pi with very little memory.
|
||||
* Written by Fabrice Bellard on January 8, 1997.
|
||||
*
|
||||
* We use a slightly modified version of the method described by Simon
|
||||
* Plouffe in "On the Computation of the n'th decimal digit of various
|
||||
* transcendental numbers" (November 1996). We have modified the algorithm
|
||||
* to get a running time of O(n^2) instead of O(n^3log(n)^3).
|
||||
*
|
||||
* This program uses mostly integer arithmetic. It may be slow on some
|
||||
* hardwares where integer multiplications and divisons must be done
|
||||
* by software. We have supposed that 'int' has a size of 32 bits. If
|
||||
* your compiler supports 'long long' integers of 64 bits, you may use
|
||||
* the integer version of 'mul_mod' (see HAS_LONG_LONG).
|
||||
*/
|
||||
|
||||
/* Adapted to FemtoRV32 (Bruno Levy Feb. 2021) */
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
// #include "errno_fix.h"
|
||||
|
||||
|
||||
//#define RV32_FASTCODE __attribute((section(".fastcode")))
|
||||
#define RV32_FASTCODE
|
||||
|
||||
/* uncomment the following line to use 'long long' integers */
|
||||
#define HAS_LONG_LONG
|
||||
|
||||
#ifdef HAS_LONG_LONG
|
||||
#define mul_mod(a,b,m) (( (long long) (a) * (long long) (b) ) % (m))
|
||||
#else
|
||||
#define mul_mod(a,b,m) fmod( (double) a * (double) b, m)
|
||||
#endif
|
||||
|
||||
/* return the inverse of x mod y */
|
||||
int inv_mod(int x, int y) RV32_FASTCODE;
|
||||
int inv_mod(int x, int y)
|
||||
{
|
||||
int q, u, v, a, c, t;
|
||||
|
||||
u = x;
|
||||
v = y;
|
||||
c = 1;
|
||||
a = 0;
|
||||
do {
|
||||
q = v / u;
|
||||
|
||||
t = c;
|
||||
c = a - q * c;
|
||||
a = t;
|
||||
|
||||
t = u;
|
||||
u = v - q * u;
|
||||
v = t;
|
||||
} while (u != 0);
|
||||
a = a % y;
|
||||
if (a < 0)
|
||||
a = y + a;
|
||||
return a;
|
||||
}
|
||||
|
||||
/* return (a^b) mod m */
|
||||
int pow_mod(int a, int b, int m) RV32_FASTCODE;
|
||||
int pow_mod(int a, int b, int m)
|
||||
{
|
||||
int r, aa;
|
||||
|
||||
r = 1;
|
||||
aa = a;
|
||||
while (1) {
|
||||
if (b & 1)
|
||||
r = mul_mod(r, aa, m);
|
||||
b = b >> 1;
|
||||
if (b == 0)
|
||||
break;
|
||||
aa = mul_mod(aa, aa, m);
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
/* return true if n is prime */
|
||||
int is_prime(int n) RV32_FASTCODE;
|
||||
int is_prime(int n)
|
||||
{
|
||||
int r, i;
|
||||
if ((n % 2) == 0)
|
||||
return 0;
|
||||
|
||||
//r = (int) (sqrt(n));
|
||||
//for (i = 3; i <= r; i += 2)
|
||||
for (i = 3; i*i <= n; i += 2)
|
||||
if ((n % i) == 0)
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* return the prime number immediatly after n */
|
||||
int next_prime(int n) RV32_FASTCODE;
|
||||
int next_prime(int n)
|
||||
{
|
||||
do {
|
||||
n++;
|
||||
} while (!is_prime(n));
|
||||
return n;
|
||||
}
|
||||
|
||||
int digits(int n) RV32_FASTCODE;
|
||||
int digits(int n) {
|
||||
int av, a, vmax, N, num, den, k, kq, kq2, t, v, s, i;
|
||||
double sum;
|
||||
|
||||
N = (int) ((n + 20) * log(10) / log(2));
|
||||
|
||||
sum = 0;
|
||||
|
||||
for (a = 3; a <= (2 * N); a = next_prime(a)) {
|
||||
|
||||
vmax = (int) (log(2 * N) / log(a));
|
||||
av = 1;
|
||||
for (i = 0; i < vmax; i++)
|
||||
av = av * a;
|
||||
|
||||
s = 0;
|
||||
num = 1;
|
||||
den = 1;
|
||||
v = 0;
|
||||
kq = 1;
|
||||
kq2 = 1;
|
||||
|
||||
for (k = 1; k <= N; k++) {
|
||||
|
||||
t = k;
|
||||
if (kq >= a) {
|
||||
do {
|
||||
t = t / a;
|
||||
v--;
|
||||
} while ((t % a) == 0);
|
||||
kq = 0;
|
||||
}
|
||||
kq++;
|
||||
num = mul_mod(num, t, av);
|
||||
|
||||
t = (2 * k - 1);
|
||||
if (kq2 >= a) {
|
||||
if (kq2 == a) {
|
||||
do {
|
||||
t = t / a;
|
||||
v++;
|
||||
} while ((t % a) == 0);
|
||||
}
|
||||
kq2 -= a;
|
||||
}
|
||||
den = mul_mod(den, t, av);
|
||||
kq2 += 2;
|
||||
|
||||
if (v > 0) {
|
||||
t = inv_mod(den, av);
|
||||
t = mul_mod(t, num, av);
|
||||
t = mul_mod(t, k, av);
|
||||
for (i = v; i < vmax; i++)
|
||||
t = mul_mod(t, a, av);
|
||||
s += t;
|
||||
if (s >= av)
|
||||
s -= av;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
t = pow_mod(10, n - 1, av);
|
||||
s = mul_mod(s, t, av);
|
||||
|
||||
sum = fmod(sum + (double) s / (double) av, 1.0);
|
||||
}
|
||||
return (int) (sum * 1e9);
|
||||
}
|
||||
|
||||
|
||||
void main() {
|
||||
printf("\npi = 3.");
|
||||
for(int n=1; ;n+=9) {
|
||||
printf("%d",digits(n));
|
||||
if(n > 36) break;
|
||||
}
|
||||
}
|
29
FIRMWARE/pipeline.ld
Normal file
29
FIRMWARE/pipeline.ld
Normal file
@@ -0,0 +1,29 @@
|
||||
MEMORY {
|
||||
PROGROM (RX) : ORIGIN = 0x00000, LENGTH = 0x10000 /* 64kB ROM */
|
||||
DATARAM (RW) : ORIGIN = 0x10000, LENGTH = 0x10000 /* 64kB RAM */
|
||||
}
|
||||
|
||||
SECTIONS {
|
||||
|
||||
.text : {
|
||||
. = ALIGN(4);
|
||||
start_pipeline.o (.text)
|
||||
*(.text*)
|
||||
} > PROGROM
|
||||
|
||||
.data : {
|
||||
. = ALIGN(4);
|
||||
*(.data*)
|
||||
*(.sdata*)
|
||||
*(.rodata*)
|
||||
*(.srodata*)
|
||||
*(.bss*)
|
||||
*(.sbss*)
|
||||
|
||||
*(COMMON)
|
||||
*(.eh_frame)
|
||||
*(.eh_frame_hdr)
|
||||
*(.init_array*)
|
||||
*(.gcc_except_table*)
|
||||
} > DATARAM
|
||||
}
|
65
FIRMWARE/print.c
Normal file
65
FIRMWARE/print.c
Normal file
@@ -0,0 +1,65 @@
|
||||
|
||||
#include <stdarg.h>
|
||||
|
||||
void print_string(const char* s) {
|
||||
for(const char* p = s; *p; ++p) {
|
||||
putchar(*p);
|
||||
}
|
||||
}
|
||||
|
||||
int puts(const char* s) {
|
||||
print_string(s);
|
||||
putchar('\n');
|
||||
return 1;
|
||||
}
|
||||
|
||||
void print_dec(int val) {
|
||||
char buffer[255];
|
||||
char *p = buffer;
|
||||
if(val < 0) {
|
||||
putchar('-');
|
||||
print_dec(-val);
|
||||
return;
|
||||
}
|
||||
while (val || p == buffer) {
|
||||
*(p++) = val % 10;
|
||||
val = val / 10;
|
||||
}
|
||||
while (p != buffer) {
|
||||
putchar('0' + *(--p));
|
||||
}
|
||||
}
|
||||
|
||||
void print_hex(unsigned int val) {
|
||||
print_hex_digits(val, 8);
|
||||
}
|
||||
|
||||
void print_hex_digits(unsigned int val, int nbdigits) {
|
||||
for (int i = (4*nbdigits)-4; i >= 0; i -= 4) {
|
||||
putchar("0123456789ABCDEF"[(val >> i) % 16]);
|
||||
}
|
||||
}
|
||||
|
||||
int printf(const char *fmt,...)
|
||||
{
|
||||
va_list ap;
|
||||
|
||||
for(va_start(ap, fmt);*fmt;fmt++)
|
||||
{
|
||||
if(*fmt=='%')
|
||||
{
|
||||
fmt++;
|
||||
if(*fmt=='s') print_string(va_arg(ap,char *));
|
||||
else if(*fmt=='x') print_hex(va_arg(ap,int));
|
||||
else if(*fmt=='d') print_dec(va_arg(ap,int));
|
||||
else if(*fmt=='c') putchar(va_arg(ap,int));
|
||||
else putchar(*fmt);
|
||||
}
|
||||
else putchar(*fmt);
|
||||
}
|
||||
|
||||
va_end(ap);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
22
FIRMWARE/putchar.S
Normal file
22
FIRMWARE/putchar.S
Normal file
@@ -0,0 +1,22 @@
|
||||
# Base address of memory-mapped IO,
|
||||
# Loaded into gp at startup
|
||||
.equ IO_BASE, 0x400000
|
||||
|
||||
# IO-reg offsets. To read or write one of them,
|
||||
# use IO_XXX(gp)
|
||||
.equ IO_LEDS, 4
|
||||
.equ IO_UART_DAT, 8
|
||||
.equ IO_UART_CNTL, 16
|
||||
|
||||
.section .text
|
||||
.globl putchar
|
||||
|
||||
putchar:
|
||||
sw a0, IO_UART_DAT(gp)
|
||||
li t0, 1<<9
|
||||
.L0:
|
||||
lw t1, IO_UART_CNTL(gp)
|
||||
and t1, t1, t0
|
||||
bnez t1, .L0
|
||||
ret
|
||||
|
518
FIRMWARE/raystones.c
Normal file
518
FIRMWARE/raystones.c
Normal file
@@ -0,0 +1,518 @@
|
||||
/* A port of Dmitry Sokolov's tiny raytracer to C and to FemtoRV32 */
|
||||
/* Displays on the small OLED display and/or HDMI */
|
||||
/* Bruno Levy, 2020 */
|
||||
/* Original tinyraytracer: https://github.com/ssloy/tinyraytracer */
|
||||
|
||||
#include <stdint.h>
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "perf.h"
|
||||
#include "io.h"
|
||||
|
||||
/*******************************************************************/
|
||||
|
||||
typedef int BOOL;
|
||||
|
||||
static inline float max(float x, float y) { return x>y?x:y; }
|
||||
static inline float min(float x, float y) { return x<y?x:y; }
|
||||
|
||||
/*******************************************************************/
|
||||
|
||||
// If you want to adapt tinyraytracer to your own platform, there are
|
||||
// mostly two macros and two functions to write:
|
||||
// graphics_width
|
||||
// graphics_height
|
||||
// graphics_init()
|
||||
// graphics_set_pixel()
|
||||
//
|
||||
// You can also write the following functions (or leave them empty if
|
||||
// you do not need them):
|
||||
// graphics_terminate()
|
||||
// stats_begin_frame()
|
||||
// stats_begin_pixel()
|
||||
// stats_end_pixel()
|
||||
// stats_end_frame()
|
||||
|
||||
|
||||
// Size of the screen
|
||||
// Replace with your own variables or values
|
||||
|
||||
// Benchmark
|
||||
// - graphics deactivated (else UART waiting loop gives
|
||||
// different results according to CPU freq / UART baud rate
|
||||
// ratio).
|
||||
// - smaller image size (for faster run in simulation)
|
||||
|
||||
static int graphics_width = 120;
|
||||
static int graphics_height = 60;
|
||||
|
||||
static int bench_run=0;
|
||||
|
||||
// Two pixels per character using UTF8 character set
|
||||
// (comment-out if terminal does not support it)
|
||||
#define graphics_double_lines
|
||||
|
||||
// Replace with your own stuff to initialize graphics
|
||||
static inline void graphics_init() {
|
||||
printf("\033[48;5;16m" // set background color black
|
||||
"\033[38;5;15m" // set foreground color white
|
||||
"\033[H" // home
|
||||
"\033[2J"); // clear screen
|
||||
}
|
||||
|
||||
// Replace with your own stuff to terminate graphics or leave empty
|
||||
// Here I send <ctrl><D> to the UART, to exit the simulation in Verilator,
|
||||
// it is captured by special code in RTL/DEVICES/uart.v
|
||||
static inline void graphics_terminate() {
|
||||
printf("\033[48;5;16m" // set background color black
|
||||
"\033[38;5;15m" // set foreground color white
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
// Replace with your own code.
|
||||
void graphics_set_pixel(int x, int y, float r, float g, float b) {
|
||||
r = max(0.0f, min(1.0f, r));
|
||||
g = max(0.0f, min(1.0f, g));
|
||||
b = max(0.0f, min(1.0f, b));
|
||||
uint8_t R = (uint8_t)(255.0f * r);
|
||||
uint8_t G = (uint8_t)(255.0f * g);
|
||||
uint8_t B = (uint8_t)(255.0f * b);
|
||||
// graphics output deactivated for bench run
|
||||
if(bench_run) {
|
||||
if(y & 1) {
|
||||
if(x == graphics_width-1) {
|
||||
printf("%d",y/2);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
#ifdef graphics_double_lines
|
||||
static uint8_t prev_R=0;
|
||||
static uint8_t prev_G=0;
|
||||
static uint8_t prev_B=0;
|
||||
if(y&1) {
|
||||
if((R == prev_R) && (G == prev_G) && (B == prev_B)) {
|
||||
printf("\033[48;2;%d;%d;%dm ",(int)R,(int)G,(int)B);
|
||||
} else {
|
||||
printf("\033[48;2;%d;%d;%dm",(int)prev_R,(int)prev_G,(int)prev_B);
|
||||
printf("\033[38;2;%d;%d;%dm",(int)R,(int)G,(int)B);
|
||||
// https://www.w3.org/TR/xml-entity-names/025.html
|
||||
// https://onlineunicodetools.com/convert-unicode-to-utf8
|
||||
printf("\xE2\x96\x83");
|
||||
}
|
||||
if(x == graphics_width-1) {
|
||||
printf("\033[38;2;0;0;0m");
|
||||
printf("\033[48;2;0;0;0m\n");
|
||||
}
|
||||
} else {
|
||||
prev_R = R;
|
||||
prev_G = G;
|
||||
prev_B = B;
|
||||
}
|
||||
#else
|
||||
printf("\033[48;2;%d;%d;%dm ",(int)R,(int)G,(int)B);
|
||||
if(x == graphics_width-1) {
|
||||
printf("\033[48;2;0;0;0m\n");
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
// Begins statistics collection for current pixel
|
||||
// Leave emtpy if not needed.
|
||||
// There are these two levels because on some
|
||||
// femtorv32 cores (quark, tachyon), the clock tick counter does not
|
||||
// have sufficient bits and will wrap during the time taken by
|
||||
// rendering a frame (up to several minutes).
|
||||
static inline stats_begin_pixel() {
|
||||
}
|
||||
|
||||
// Ends statistics collection for current pixel
|
||||
// Leave emtpy if not needed.
|
||||
static inline stats_end_pixel() {
|
||||
}
|
||||
|
||||
// Print "fixed point" number (integer/1000)
|
||||
static void printk(uint64_t kx) {
|
||||
int intpart = (int)(kx / 1000);
|
||||
int fracpart = (int)(kx % 1000);
|
||||
printf("%d.",intpart);
|
||||
if(fracpart<100) {
|
||||
printf("0");
|
||||
}
|
||||
if(fracpart<10) {
|
||||
printf("0");
|
||||
}
|
||||
printf("%d",fracpart);
|
||||
}
|
||||
|
||||
static uint64_t instret_start;
|
||||
static uint64_t cycles_start;
|
||||
|
||||
// Begins statistics collection for current frame.
|
||||
// Leave emtpy if not needed.
|
||||
static inline stats_begin_frame() {
|
||||
instret_start = rdinstret();
|
||||
cycles_start = rdcycle();
|
||||
}
|
||||
|
||||
// Ends statistics collection for current frame
|
||||
// and displays result.
|
||||
// Leave emtpy if not needed.
|
||||
static inline stats_end_frame() {
|
||||
graphics_terminate();
|
||||
uint64_t instret = rdinstret() - instret_start;
|
||||
uint64_t cycles = rdcycle() - cycles_start ;
|
||||
uint64_t kCPI = cycles*1000/instret;
|
||||
uint64_t pixels = graphics_width * graphics_height;
|
||||
uint64_t kRAYSTONES = (pixels*1000000000)/cycles;
|
||||
printf(
|
||||
"\n%dx%d %s ",
|
||||
graphics_width,graphics_height,
|
||||
bench_run ?
|
||||
"no gfx output (measurement is accurate)" :
|
||||
"gfx output (measurement is NOT accurate)"
|
||||
);
|
||||
printf("CPI="); printk(kCPI); printf(" ");
|
||||
printf("RAYSTONES="); printk(kRAYSTONES);
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
// Normally you will not need to modify anything beyond that point.
|
||||
/*******************************************************************/
|
||||
|
||||
typedef struct { float x,y,z; } vec3;
|
||||
typedef struct { float x,y,z,w; } vec4;
|
||||
|
||||
static inline vec3 make_vec3(float x, float y, float z) {
|
||||
vec3 V;
|
||||
V.x = x; V.y = y; V.z = z;
|
||||
return V;
|
||||
}
|
||||
|
||||
static inline vec4 make_vec4(float x, float y, float z, float w) {
|
||||
vec4 V;
|
||||
V.x = x; V.y = y; V.z = z; V.w = w;
|
||||
return V;
|
||||
}
|
||||
|
||||
static inline vec3 vec3_neg(vec3 V) {
|
||||
return make_vec3(-V.x, -V.y, -V.z);
|
||||
}
|
||||
|
||||
static inline vec3 vec3_add(vec3 U, vec3 V) {
|
||||
return make_vec3(U.x+V.x, U.y+V.y, U.z+V.z);
|
||||
}
|
||||
|
||||
static inline vec3 vec3_sub(vec3 U, vec3 V) {
|
||||
return make_vec3(U.x-V.x, U.y-V.y, U.z-V.z);
|
||||
}
|
||||
|
||||
static inline float vec3_dot(vec3 U, vec3 V) {
|
||||
return U.x*V.x+U.y*V.y+U.z*V.z;
|
||||
}
|
||||
|
||||
static inline vec3 vec3_scale(float s, vec3 U) {
|
||||
return make_vec3(s*U.x, s*U.y, s*U.z);
|
||||
}
|
||||
|
||||
static inline float vec3_length(vec3 U) {
|
||||
return sqrtf(U.x*U.x+U.y*U.y+U.z*U.z);
|
||||
}
|
||||
|
||||
static inline vec3 vec3_normalize(vec3 U) {
|
||||
return vec3_scale(1.0f/vec3_length(U),U);
|
||||
}
|
||||
|
||||
/*************************************************************************/
|
||||
|
||||
typedef struct Light {
|
||||
vec3 position;
|
||||
float intensity;
|
||||
} Light;
|
||||
|
||||
Light make_Light(vec3 position, float intensity) {
|
||||
Light L;
|
||||
L.position = position;
|
||||
L.intensity = intensity;
|
||||
return L;
|
||||
}
|
||||
|
||||
/*************************************************************************/
|
||||
|
||||
typedef struct {
|
||||
float refractive_index;
|
||||
vec4 albedo;
|
||||
vec3 diffuse_color;
|
||||
float specular_exponent;
|
||||
} Material;
|
||||
|
||||
Material make_Material(float r, vec4 a, vec3 color, float spec) {
|
||||
Material M;
|
||||
M.refractive_index = r;
|
||||
M.albedo = a;
|
||||
M.diffuse_color = color;
|
||||
M.specular_exponent = spec;
|
||||
return M;
|
||||
}
|
||||
|
||||
Material make_Material_default() {
|
||||
Material M;
|
||||
M.refractive_index = 1;
|
||||
M.albedo = make_vec4(1,0,0,0);
|
||||
M.diffuse_color = make_vec3(0,0,0);
|
||||
M.specular_exponent = 0;
|
||||
return M;
|
||||
}
|
||||
|
||||
/*************************************************************************/
|
||||
|
||||
typedef struct {
|
||||
vec3 center;
|
||||
float radius;
|
||||
Material material;
|
||||
} Sphere;
|
||||
|
||||
Sphere make_Sphere(vec3 c, float r, Material M) {
|
||||
Sphere S;
|
||||
S.center = c;
|
||||
S.radius = r;
|
||||
S.material = M;
|
||||
return S;
|
||||
}
|
||||
|
||||
BOOL Sphere_ray_intersect(Sphere* S, vec3 orig, vec3 dir, float* t0) {
|
||||
vec3 L = vec3_sub(S->center, orig);
|
||||
float tca = vec3_dot(L,dir);
|
||||
float d2 = vec3_dot(L,L) - tca*tca;
|
||||
float r2 = S->radius*S->radius;
|
||||
if (d2 > r2) return 0;
|
||||
float thc = sqrtf(r2 - d2);
|
||||
*t0 = tca - thc;
|
||||
float t1 = tca + thc;
|
||||
if (*t0 < 0) *t0 = t1;
|
||||
if (*t0 < 0) return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
vec3 reflect(vec3 I, vec3 N) {
|
||||
return vec3_sub(I, vec3_scale(2.f*vec3_dot(I,N),N));
|
||||
}
|
||||
|
||||
vec3 refract(vec3 I, vec3 N, float eta_t, float eta_i /* =1.f */) {
|
||||
// Snell's law
|
||||
float cosi = -max(-1.f, min(1.f, vec3_dot(I,N)));
|
||||
// if the ray comes from the inside the object, swap the air and the media
|
||||
if (cosi<0) return refract(I, vec3_neg(N), eta_i, eta_t);
|
||||
float eta = eta_i / eta_t;
|
||||
float k = 1 - eta*eta*(1 - cosi*cosi);
|
||||
// k<0 = total reflection, no ray to refract.
|
||||
// I refract it anyways, this has no physical meaning
|
||||
return k<0 ? make_vec3(1,0,0)
|
||||
: vec3_add(vec3_scale(eta,I),vec3_scale((eta*cosi - sqrtf(k)),N));
|
||||
}
|
||||
|
||||
BOOL scene_intersect(
|
||||
vec3 orig, vec3 dir, Sphere* spheres, int nb_spheres,
|
||||
vec3* hit, vec3* N, Material* material
|
||||
) {
|
||||
float spheres_dist = 1e30;
|
||||
for(int i=0; i<nb_spheres; ++i) {
|
||||
float dist_i;
|
||||
if(
|
||||
Sphere_ray_intersect(&spheres[i], orig, dir, &dist_i) &&
|
||||
(dist_i < spheres_dist)
|
||||
) {
|
||||
spheres_dist = dist_i;
|
||||
*hit = vec3_add(orig,vec3_scale(dist_i,dir));
|
||||
*N = vec3_normalize(vec3_sub(*hit, spheres[i].center));
|
||||
*material = spheres[i].material;
|
||||
}
|
||||
}
|
||||
float checkerboard_dist = 1e30;
|
||||
if (fabs(dir.y)>1e-3) {
|
||||
float d = -(orig.y+4)/dir.y; // the checkerboard plane has equation y = -4
|
||||
vec3 pt = vec3_add(orig, vec3_scale(d,dir));
|
||||
if (d>0 && fabs(pt.x)<10 && pt.z<-10 && pt.z>-30 && d<spheres_dist) {
|
||||
checkerboard_dist = d;
|
||||
*hit = pt;
|
||||
*N = make_vec3(0,1,0);
|
||||
material->diffuse_color =
|
||||
(((int)(.5*hit->x+1000) + (int)(.5*hit->z)) & 1)
|
||||
? make_vec3(.3, .3, .3)
|
||||
: make_vec3(.3, .2, .1);
|
||||
}
|
||||
}
|
||||
return min(spheres_dist, checkerboard_dist)<1000;
|
||||
}
|
||||
|
||||
vec3 cast_ray(
|
||||
vec3 orig, vec3 dir, Sphere* spheres, int nb_spheres,
|
||||
Light* lights, int nb_lights, int depth /* =0 */
|
||||
) {
|
||||
vec3 point,N;
|
||||
Material material = make_Material_default();
|
||||
if (
|
||||
depth>2 ||
|
||||
!scene_intersect(orig, dir, spheres, nb_spheres, &point, &N, &material)
|
||||
) {
|
||||
float s = 0.5*(dir.y + 1.0);
|
||||
return vec3_add(
|
||||
vec3_scale(s,make_vec3(0.2, 0.7, 0.8)),
|
||||
vec3_scale(s,make_vec3(0.0, 0.0, 0.5))
|
||||
);
|
||||
}
|
||||
|
||||
vec3 reflect_dir=vec3_normalize(reflect(dir, N));
|
||||
vec3 refract_dir=vec3_normalize(refract(dir,N,material.refractive_index,1));
|
||||
|
||||
// offset the original point to avoid occlusion by the object itself
|
||||
vec3 reflect_orig =
|
||||
vec3_dot(reflect_dir,N) < 0
|
||||
? vec3_sub(point,vec3_scale(1e-3,N))
|
||||
: vec3_add(point,vec3_scale(1e-3,N));
|
||||
vec3 refract_orig =
|
||||
vec3_dot(refract_dir,N) < 0
|
||||
? vec3_sub(point,vec3_scale(1e-3,N))
|
||||
: vec3_add(point,vec3_scale(1e-3,N));
|
||||
vec3 reflect_color = cast_ray(
|
||||
reflect_orig, reflect_dir, spheres, nb_spheres,
|
||||
lights, nb_lights, depth + 1
|
||||
);
|
||||
vec3 refract_color = cast_ray(
|
||||
refract_orig, refract_dir, spheres, nb_spheres,
|
||||
lights, nb_lights, depth + 1
|
||||
);
|
||||
|
||||
float diffuse_light_intensity = 0, specular_light_intensity = 0;
|
||||
for (int i=0; i<nb_lights; i++) {
|
||||
vec3 light_dir = vec3_normalize(vec3_sub(lights[i].position,point));
|
||||
float light_distance = vec3_length(vec3_sub(lights[i].position,point));
|
||||
|
||||
vec3 shadow_orig =
|
||||
vec3_dot(light_dir,N) < 0
|
||||
? vec3_sub(point,vec3_scale(1e-3,N))
|
||||
: vec3_add(point,vec3_scale(1e-3,N)) ;
|
||||
// checking if the point lies in the shadow of the lights[i]
|
||||
vec3 shadow_pt, shadow_N;
|
||||
Material tmpmaterial;
|
||||
if (
|
||||
scene_intersect(
|
||||
shadow_orig, light_dir, spheres, nb_spheres,
|
||||
&shadow_pt, &shadow_N, &tmpmaterial
|
||||
) && (
|
||||
vec3_length(vec3_sub(shadow_pt,shadow_orig)) < light_distance
|
||||
)
|
||||
) continue ;
|
||||
|
||||
diffuse_light_intensity +=
|
||||
lights[i].intensity * max(0.f, vec3_dot(light_dir,N));
|
||||
|
||||
float abc = max(
|
||||
0.f, vec3_dot(vec3_neg(reflect(vec3_neg(light_dir), N)),dir)
|
||||
);
|
||||
float def = material.specular_exponent;
|
||||
if(abc > 0.0f && def > 0.0f) {
|
||||
specular_light_intensity += powf(abc,def)*lights[i].intensity;
|
||||
}
|
||||
}
|
||||
vec3 result = vec3_scale(
|
||||
diffuse_light_intensity * material.albedo.x, material.diffuse_color
|
||||
);
|
||||
result = vec3_add(
|
||||
result, vec3_scale(specular_light_intensity * material.albedo.y,
|
||||
make_vec3(1,1,1))
|
||||
);
|
||||
result = vec3_add(result, vec3_scale(material.albedo.z, reflect_color));
|
||||
result = vec3_add(result, vec3_scale(material.albedo.w, refract_color));
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline void render_pixel(
|
||||
int i, int j, Sphere* spheres, int nb_spheres, Light* lights, int nb_lights
|
||||
) {
|
||||
const float fov = M_PI/3.;
|
||||
stats_begin_pixel();
|
||||
float dir_x = (i + 0.5) - graphics_width/2.;
|
||||
float dir_y = -(j + 0.5) + graphics_height/2.; // this flips the image.
|
||||
float dir_z = -graphics_height/(2.*tan(fov/2.));
|
||||
vec3 C = cast_ray(
|
||||
make_vec3(0,0,0), vec3_normalize(make_vec3(dir_x, dir_y, dir_z)),
|
||||
spheres, nb_spheres, lights, nb_lights, 0
|
||||
);
|
||||
graphics_set_pixel(i,j,C.x,C.y,C.z);
|
||||
stats_end_pixel();
|
||||
}
|
||||
|
||||
void render(Sphere* spheres, int nb_spheres, Light* lights, int nb_lights) {
|
||||
stats_begin_frame();
|
||||
#ifdef graphics_double_lines
|
||||
for (int j = 0; j<graphics_height; j+=2) {
|
||||
for (int i = 0; i<graphics_width; i++) {
|
||||
render_pixel(i,j ,spheres,nb_spheres,lights,nb_lights);
|
||||
render_pixel(i,j+1,spheres,nb_spheres,lights,nb_lights);
|
||||
}
|
||||
}
|
||||
#else
|
||||
for (int j = 0; j<graphics_height; j++) {
|
||||
for (int i = 0; i<graphics_width; i++) {
|
||||
render_pixel(i,j ,spheres,nb_spheres,lights,nb_lights);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
stats_end_frame();
|
||||
}
|
||||
|
||||
int nb_spheres = 4;
|
||||
Sphere spheres[4];
|
||||
|
||||
int nb_lights = 3;
|
||||
Light lights[3];
|
||||
|
||||
void init_scene() {
|
||||
Material ivory = make_Material(
|
||||
1.0, make_vec4(0.6, 0.3, 0.1, 0.0), make_vec3(0.4, 0.4, 0.3), 50.
|
||||
);
|
||||
Material glass = make_Material(
|
||||
1.5, make_vec4(0.0, 0.5, 0.1, 0.8), make_vec3(0.6, 0.7, 0.8), 125.
|
||||
);
|
||||
Material red_rubber = make_Material(
|
||||
1.0, make_vec4(0.9, 0.1, 0.0, 0.0), make_vec3(0.3, 0.1, 0.1), 10.
|
||||
);
|
||||
Material mirror = make_Material(
|
||||
1.0, make_vec4(0.0, 10.0, 0.8, 0.0), make_vec3(1.0, 1.0, 1.0), 142.
|
||||
);
|
||||
|
||||
spheres[0] = make_Sphere(make_vec3(-3, 0, -16), 2, ivory);
|
||||
spheres[1] = make_Sphere(make_vec3(-1.0, -1.5, -12), 2, glass);
|
||||
spheres[2] = make_Sphere(make_vec3( 1.5, -0.5, -18), 3, red_rubber);
|
||||
spheres[3] = make_Sphere(make_vec3( 7, 5, -18), 4, mirror);
|
||||
|
||||
lights[0] = make_Light(make_vec3(-20, 20, 20), 1.5);
|
||||
lights[1] = make_Light(make_vec3( 30, 50, -25), 1.8);
|
||||
lights[2] = make_Light(make_vec3( 30, 20, 30), 1.7);
|
||||
}
|
||||
|
||||
int main() {
|
||||
init_scene();
|
||||
|
||||
graphics_init();
|
||||
IO_OUT(IO_LEDS,5);
|
||||
bench_run = 1;
|
||||
graphics_width = 40;
|
||||
graphics_height = 20;
|
||||
printf("Running without graphic output (for accurate measurement)...\n");
|
||||
render(spheres, nb_spheres, lights, nb_lights);
|
||||
IO_OUT(IO_LEDS,10);
|
||||
|
||||
bench_run = 0;
|
||||
graphics_width = 120;
|
||||
graphics_height = 60;
|
||||
render(spheres, nb_spheres, lights, nb_lights);
|
||||
IO_OUT(IO_LEDS,15);
|
||||
graphics_terminate();
|
||||
|
||||
return 0;
|
||||
}
|
14
FIRMWARE/read_spiflash.c
Normal file
14
FIRMWARE/read_spiflash.c
Normal file
@@ -0,0 +1,14 @@
|
||||
#include "io.h"
|
||||
|
||||
#define SPI_FLASH_BASE ((char*)(1 << 23))
|
||||
|
||||
int main() {
|
||||
for(int i=0; i<16; ++i) {
|
||||
IO_OUT(IO_LEDS,i);
|
||||
int lo = (int)SPI_FLASH_BASE[2*i ];
|
||||
int hi = (int)SPI_FLASH_BASE[2*i+1];
|
||||
print_hex_digits((hi << 8) | lo,4); // print four hexadecimal digits
|
||||
printf(" ");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
135
FIRMWARE/riscv_logo.c
Normal file
135
FIRMWARE/riscv_logo.c
Normal file
@@ -0,0 +1,135 @@
|
||||
/*
|
||||
* FEMTORV32 - FEMTOSOC demo program:
|
||||
* Displaying a rotating RISCV logo
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef __linux__
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
|
||||
/* The RISCV logo, with a tiny resolution
|
||||
* (remember, I only got 4Kb of RAM
|
||||
* on the IceStick !)
|
||||
*/
|
||||
unsigned char logo[16][16] = {
|
||||
{7,7,7,7,7,7,5,3,3,3,3,3,3,3,3,7},
|
||||
{7,7,7,7,7,7,7,5,3,3,3,3,3,3,3,7},
|
||||
{1,1,1,1,2,7,7,7,3,3,3,3,3,3,3,7},
|
||||
{0,0,0,0,0,1,7,7,5,3,3,3,3,3,3,7},
|
||||
{0,0,0,0,0,0,7,7,6,3,3,3,3,3,6,7},
|
||||
{0,0,0,0,0,0,7,7,5,3,3,3,3,4,7,7},
|
||||
{0,0,0,0,0,2,7,7,4,3,3,3,3,7,7,7},
|
||||
{0,2,2,2,7,7,7,6,3,3,3,3,6,7,7,7},
|
||||
{0,7,7,7,7,7,6,3,3,3,3,5,7,7,2,7},
|
||||
{0,1,7,7,7,4,3,3,3,3,3,7,7,7,0,7},
|
||||
{0,0,2,7,7,6,3,3,3,3,6,7,7,1,0,7},
|
||||
{0,0,0,2,7,7,5,3,3,5,7,7,2,0,0,7},
|
||||
{0,0,0,0,7,7,7,5,4,7,7,2,0,0,0,7},
|
||||
{0,0,0,0,0,7,7,7,7,7,7,0,0,0,0,7},
|
||||
{0,0,0,0,0,1,7,7,7,7,1,0,0,0,0,7},
|
||||
{7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7}
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* ANSI color codes:
|
||||
* https://stackoverflow.com/questions/4842424/list-of-ansi-color-escape-sequences
|
||||
*/
|
||||
|
||||
|
||||
#define ANSIRGB(R,G,B) "\033[48;2;" #R ";" #G ";" #B "m "
|
||||
|
||||
#define ANSICOL(C) "\033[" #C "m "
|
||||
|
||||
|
||||
/*
|
||||
* The colormap.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This ones corresponds to the official RISC-V logo,
|
||||
* but uses more bandwidth (full RGB ANSI codes)
|
||||
*/
|
||||
/*
|
||||
const char* cmap[8] = {
|
||||
ANSIRGB(040,051,116),
|
||||
ANSIRGB(123,128,155),
|
||||
ANSIRGB(170,172,188),
|
||||
ANSIRGB(249,177,021),
|
||||
ANSIRGB(249,190,101),
|
||||
ANSIRGB(249,199,130),
|
||||
ANSIRGB(252,216,176),
|
||||
ANSIRGB(250,251,248)
|
||||
};
|
||||
*/
|
||||
|
||||
/* more compact colormap */
|
||||
const char* cmap[8] = {
|
||||
ANSICOL(44),
|
||||
ANSICOL(104),
|
||||
ANSICOL(47),
|
||||
ANSICOL(102),
|
||||
ANSICOL(103),
|
||||
ANSICOL(103),
|
||||
ANSICOL(103),
|
||||
ANSICOL(107)
|
||||
};
|
||||
|
||||
/*
|
||||
* Generated by TOOLS/make_sintab.c
|
||||
*/
|
||||
|
||||
int sintab[64] = {
|
||||
0,25,49,74,97,120,142,162,181,197,212,225,236,244,251,254,
|
||||
256,254,251,244,236,225,212,197,181,162,142,120,97,74,49,25,
|
||||
0,-25,-49,-74,-97,-120,-142,-162,-181,-197,-212,-225,-236,-244,
|
||||
-251,-254,-256,-254,-251,-244,-236,-225,-212,-197,-181,-162,
|
||||
-142,-120,-97,-74,-49,-25
|
||||
};
|
||||
|
||||
|
||||
#define GL_width 40
|
||||
#define GL_height 40
|
||||
|
||||
|
||||
void main() {
|
||||
|
||||
int frame = 0;
|
||||
int last_col = -1;
|
||||
for(;;) {
|
||||
printf("\033[H"); // reset cursor position
|
||||
|
||||
int scaling = (sintab[frame&63]+400) << 1;
|
||||
int Ux = scaling*sintab[frame & 63];
|
||||
int Uy = scaling*sintab[(frame + 16) & 63];
|
||||
int Vx = -Uy;
|
||||
int Vy = Ux;
|
||||
|
||||
int X0 = -(GL_width/2)*(Ux+Vx);
|
||||
int Y0 = -(GL_height/2)*(Uy+Vy);
|
||||
|
||||
for(int y=0; y<GL_height; ++y) {
|
||||
int X = X0;
|
||||
int Y = Y0;
|
||||
for(int x=0; x<GL_width; ++x) {
|
||||
unsigned char col = logo[(Y >> 18)&15][(X >> 18)&15];
|
||||
printf(col == last_col ? " " : cmap[col]);
|
||||
last_col = col;
|
||||
X += Ux;
|
||||
Y += Uy;
|
||||
}
|
||||
printf("\033[49m\n"); // reset color to black and newline
|
||||
last_col = -1;
|
||||
X0 += Vx;
|
||||
Y0 += Vy;
|
||||
}
|
||||
++frame;
|
||||
#ifdef __linux__
|
||||
usleep(20000);
|
||||
#endif
|
||||
// if(frame > 20) break;
|
||||
}
|
||||
}
|
106
FIRMWARE/sieve.c
Normal file
106
FIRMWARE/sieve.c
Normal file
@@ -0,0 +1,106 @@
|
||||
// Taken from picorv32
|
||||
//
|
||||
// This is free and unencumbered software released into the public domain.
|
||||
//
|
||||
// Anyone is free to copy, modify, publish, use, compile, sell, or
|
||||
// distribute this software, either in source code form or as a compiled
|
||||
// binary, for any purpose, commercial or non-commercial, and by any
|
||||
// means.
|
||||
|
||||
// A simple Sieve of Eratosthenes
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
|
||||
/*************************************************************************/
|
||||
|
||||
// Note: if this is changed, then checksum need
|
||||
// to be updated as well.
|
||||
#define BITMAP_SIZE 64
|
||||
|
||||
typedef int bool;
|
||||
|
||||
static uint32_t bitmap[BITMAP_SIZE/32];
|
||||
|
||||
static uint32_t hash;
|
||||
|
||||
static uint32_t mkhash(uint32_t a, uint32_t b)
|
||||
{
|
||||
// The XOR version of DJB2
|
||||
return ((a << 5) + a) ^ b;
|
||||
}
|
||||
|
||||
static void bitmap_set(int idx)
|
||||
{
|
||||
bitmap[idx/32] |= 1 << (idx % 32);
|
||||
}
|
||||
|
||||
static bool bitmap_get(int idx)
|
||||
{
|
||||
return (bitmap[idx/32] & (1 << (idx % 32))) != 0;
|
||||
}
|
||||
|
||||
static void print_prime(int idx, int val)
|
||||
{
|
||||
if (idx < 10)
|
||||
printf(" ");
|
||||
printf("%d",idx);
|
||||
|
||||
if (idx / 10 == 1)
|
||||
goto force_th;
|
||||
switch (idx % 10) {
|
||||
case 1: printf("st"); break;
|
||||
case 2: printf("nd"); break;
|
||||
case 3: printf("rd"); break;
|
||||
force_th:
|
||||
default: printf("th"); break;
|
||||
}
|
||||
printf(" prime: %d\n",val);
|
||||
|
||||
hash = mkhash(hash, idx);
|
||||
hash = mkhash(hash, val);
|
||||
}
|
||||
|
||||
void sieve(void)
|
||||
{
|
||||
|
||||
int idx = 1;
|
||||
hash = 5381;
|
||||
print_prime(idx++, 2);
|
||||
for (int i = 0; i < BITMAP_SIZE; i++) {
|
||||
if (bitmap_get(i))
|
||||
continue;
|
||||
print_prime(idx++, 3+2*i);
|
||||
for (int j = 2*(3+2*i);; j += 3+2*i) {
|
||||
if (j%2 == 0)
|
||||
continue;
|
||||
int k = (j-3)/2;
|
||||
if (k >= BITMAP_SIZE)
|
||||
break;
|
||||
bitmap_set(k);
|
||||
}
|
||||
}
|
||||
|
||||
printf("checksum:\n %x",hash);
|
||||
|
||||
if (hash == 0x1772A48F) {
|
||||
printf(" OK\n");
|
||||
} else {
|
||||
printf(" ERROR\n");
|
||||
}
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
|
||||
for(;;) {
|
||||
sieve();
|
||||
for(int i=0; i<10; ++i) {
|
||||
wait();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
10
FIRMWARE/spiflash0.ld
Normal file
10
FIRMWARE/spiflash0.ld
Normal file
@@ -0,0 +1,10 @@
|
||||
MEMORY {
|
||||
FLASH (RX) : ORIGIN = 0x00820000, LENGTH = 0x100000 /* 4 MB in flash */
|
||||
}
|
||||
SECTIONS {
|
||||
everything : {
|
||||
. = ALIGN(4);
|
||||
start.o (.text)
|
||||
*(.*)
|
||||
} >FLASH
|
||||
}
|
60
FIRMWARE/spiflash1.ld
Normal file
60
FIRMWARE/spiflash1.ld
Normal file
@@ -0,0 +1,60 @@
|
||||
/* Linker script for programs stored in SPI flash */
|
||||
/* Inspired from picorv32/picosoc/sections.lds */
|
||||
/* */
|
||||
/* text and rodata sections are sent to flash */
|
||||
/* bss sections are sent to BRAM */
|
||||
/* data sections are sent to BRAM and have */
|
||||
/* initialization data in flash. */
|
||||
/* AT keyword specifies LMA (Load Memory Address) */
|
||||
|
||||
MEMORY {
|
||||
FLASH (rx) : ORIGIN = 0x00820000, LENGTH = 0x100000 /* 4 MB in flash */
|
||||
RAM (rwx) : ORIGIN = 0x00000000, LENGTH = 0x1800 /* 6 kB in RAM */
|
||||
}
|
||||
|
||||
SECTIONS {
|
||||
|
||||
/*
|
||||
* This is the initialized data and fastcode section
|
||||
* The program executes knowing that the data is in the RAM
|
||||
* but the loader puts the initial values in the FLASH (inidata).
|
||||
* It is one task of the startup (crt0_spiflash.S) to copy the initial values from FLASH to RAM.
|
||||
*/
|
||||
.data : AT ( _sidata ) {
|
||||
|
||||
. = ALIGN(4);
|
||||
|
||||
_sdata = .; /* create a global symbol at data start; used by startup code in order to initialise the .data section in RAM */
|
||||
_ram_start = .; /* create a global symbol at ram start (e.g., for garbage collector) */
|
||||
|
||||
/* Initialized data */
|
||||
*(.data*)
|
||||
*(.sdata*)
|
||||
|
||||
. = ALIGN(4);
|
||||
_edata = .; /* define a global symbol at data end; used by startup code in order to initialise the .data section in RAM */
|
||||
} > RAM
|
||||
|
||||
/* The (non fastcode) program code and other data goes into FLASH */
|
||||
.text : {
|
||||
. = ALIGN(4);
|
||||
start_spiflash1.o(.text) /* c runtime initialization (code) */
|
||||
*(.text*) /* .text* sections (code) */
|
||||
. = ALIGN(4);
|
||||
*(.rodata*) /* .rodata* sections (constants, strings, etc.) */
|
||||
*(.srodata*) /* .rodata* sections (constants, strings, etc.) */
|
||||
_etext = .; /* define a global symbol at end of code */
|
||||
_sidata = _etext; /* This is used by the startup in order to initialize the .data section */
|
||||
} >FLASH
|
||||
|
||||
/* Uninitialized data section */
|
||||
.bss : {
|
||||
. = ALIGN(4);
|
||||
_sbss = .; /* define a global symbol at bss start; used by startup code */
|
||||
*(.bss*)
|
||||
*(.sbss*)
|
||||
*(COMMON)
|
||||
. = ALIGN(4);
|
||||
_ebss = .; /* define a global symbol at bss end; used by startup code */
|
||||
} >RAM
|
||||
}
|
80
FIRMWARE/spiflash2.ld
Normal file
80
FIRMWARE/spiflash2.ld
Normal file
@@ -0,0 +1,80 @@
|
||||
/* Linker script for programs stored in SPI flash */
|
||||
/* Inspired from picorv32/picosoc/sections.lds */
|
||||
/* */
|
||||
/* text and rodata sections are sent to flash */
|
||||
/* bss sections are sent to BRAM */
|
||||
/* data sections are sent to BRAM and have */
|
||||
/* initialization data in flash. */
|
||||
/* AT keyword specifies LMA (Load Memory Address) */
|
||||
|
||||
MEMORY {
|
||||
FLASH (rx) : ORIGIN = 0x00820000, LENGTH = 0x100000 /* 4 MB in flash */
|
||||
RAM (rwx) : ORIGIN = 0x00000000, LENGTH = 0x1800 /* 6 kB in RAM */
|
||||
}
|
||||
|
||||
SECTIONS {
|
||||
|
||||
|
||||
/*
|
||||
* This is the initialized data and fastcode section
|
||||
* The program executes knowing that the data is in the RAM
|
||||
* but the loader puts the initial values in the FLASH (inidata).
|
||||
* It is one task of the startup (crt0_spiflash.S) to copy the initial values from FLASH to RAM.
|
||||
*/
|
||||
.data_and_fastcode : AT ( _sidata ) {
|
||||
. = ALIGN(4);
|
||||
_sdata = .; /* create a global symbol at data start; used by startup code in order to initialise the .data section in RAM */
|
||||
_ram_start = .; /* create a global symbol at ram start (e.g., for garbage collector) */
|
||||
|
||||
/* Initialized data */
|
||||
*(.data*)
|
||||
*(.sdata*)
|
||||
|
||||
/* integer mul and div */
|
||||
*/libgcc.a:muldi3.o(.text)
|
||||
*/libgcc.a:div.o(.text)
|
||||
|
||||
putchar.o(.text)
|
||||
print.o(.text)
|
||||
|
||||
/* functions with attribute((section(".fastcode"))) */
|
||||
*(.fastcode*)
|
||||
|
||||
. = ALIGN(4);
|
||||
_edata = .; /* define a global symbol at data end; used by startup code in order to initialise the .data section in RAM */
|
||||
} > RAM
|
||||
|
||||
/* The (non fastcode) program code and other data goes into FLASH */
|
||||
.text : {
|
||||
. = ALIGN(4);
|
||||
start_spiflash1.o(.text) /* c runtime initialization (code) */
|
||||
|
||||
/*
|
||||
* I do not understand why, but if I do not put this section, I got
|
||||
* an overlapping sections error with some programs (for instance pi.c
|
||||
* or C++ programs)
|
||||
*/
|
||||
*(.eh_frame)
|
||||
*(.eh_frame_hdr)
|
||||
*(.init_array)
|
||||
*(.gcc_except_table*)
|
||||
|
||||
*(.text*) /* .text* sections (code) */
|
||||
. = ALIGN(4);
|
||||
*(.rodata*) /* .rodata* sections (constants, strings, etc.) */
|
||||
*(.srodata*) /* .rodata* sections (constants, strings, etc.) */
|
||||
_etext = .; /* define a global symbol at end of code */
|
||||
_sidata = _etext; /* This is used by the startup in order to initialize the .data section */
|
||||
} >FLASH
|
||||
|
||||
/* Uninitialized data section */
|
||||
.bss : {
|
||||
. = ALIGN(4);
|
||||
_sbss = .; /* define a global symbol at bss start; used by startup code */
|
||||
*(.bss*)
|
||||
*(.sbss*)
|
||||
*(COMMON)
|
||||
. = ALIGN(4);
|
||||
_ebss = .; /* define a global symbol at bss end; used by startup code */
|
||||
} >RAM
|
||||
}
|
87
FIRMWARE/spiflash3.ld
Normal file
87
FIRMWARE/spiflash3.ld
Normal file
@@ -0,0 +1,87 @@
|
||||
/* Linker script for programs stored in SPI flash */
|
||||
/* Inspired from picorv32/picosoc/sections.lds */
|
||||
/* */
|
||||
/* text and rodata sections are sent to flash */
|
||||
/* bss sections are sent to BRAM */
|
||||
/* data sections are sent to BRAM and have */
|
||||
/* initialization data in flash. */
|
||||
/* AT keyword specifies LMA (Load Memory Address) */
|
||||
|
||||
MEMORY {
|
||||
FLASH (rx) : ORIGIN = 0x00820000, LENGTH = 0x100000 /* 4 MB in flash */
|
||||
RAM (rwx) : ORIGIN = 0x00000000, LENGTH = 0x1800 /* 6 kB in RAM */
|
||||
}
|
||||
|
||||
SECTIONS {
|
||||
|
||||
|
||||
/*
|
||||
* This is the initialized data and fastcode section
|
||||
* The program executes knowing that the data is in the RAM
|
||||
* but the loader puts the initial values in the FLASH (inidata).
|
||||
* It is one task of the startup (crt0_spiflash.S) to copy the initial values from FLASH to RAM.
|
||||
*/
|
||||
.data_and_fastcode : AT ( _sidata ) {
|
||||
. = ALIGN(4);
|
||||
_sdata = .; /* create a global symbol at data start; used by startup code in order to initialise the .data section in RAM */
|
||||
_ram_start = .; /* create a global symbol at ram start (e.g., for garbage collector) */
|
||||
|
||||
/* Initialized data */
|
||||
*(.data*)
|
||||
*(.sdata*)
|
||||
|
||||
/* integer mul and div */
|
||||
*/libgcc.a:muldi3.o(.text)
|
||||
*/libgcc.a:div.o(.text)
|
||||
|
||||
/* putchar.o(.text) */
|
||||
|
||||
/* functions with attribute((section(".fastcode"))) */
|
||||
*(.fastcode*)
|
||||
|
||||
. = ALIGN(4);
|
||||
_edata = .; /* define a global symbol at data end; used by startup code in order to initialise the .data section in RAM */
|
||||
} > RAM
|
||||
|
||||
/* The (non fastcode) program code and other data goes into FLASH */
|
||||
.text : {
|
||||
. = ALIGN(4);
|
||||
start_spiflash1.o(.text) /* c runtime initialization (code) */
|
||||
|
||||
/*
|
||||
* I do not understand why, but if I do not put this section, I got
|
||||
* an overlapping sections error with some programs (for instance pi.c
|
||||
* or C++ programs)
|
||||
*/
|
||||
*(.eh_frame)
|
||||
*(.eh_frame_hdr)
|
||||
*(.init_array*)
|
||||
*(.gcc_except_table*)
|
||||
|
||||
*(.text*) /* .text* sections (code) */
|
||||
. = ALIGN(4);
|
||||
*(.rodata*) /* .rodata* sections (constants, strings, etc.) */
|
||||
*(.srodata*) /* .rodata* sections (constants, strings, etc.) */
|
||||
_etext = .; /* define a global symbol at end of code */
|
||||
_sidata = _etext; /* This is used by the startup in order to initialize the .data section */
|
||||
} >FLASH
|
||||
|
||||
/* Uninitialized data section */
|
||||
.bss : {
|
||||
. = ALIGN(4);
|
||||
_sbss = .; /* define a global symbol at bss start; used by startup code */
|
||||
*(.bss*)
|
||||
*(.sbss*)
|
||||
*(COMMON)
|
||||
. = ALIGN(4);
|
||||
_ebss = .; /* define a global symbol at bss end; used by startup code */
|
||||
} >RAM
|
||||
|
||||
/* this is to define the start of the heap, and make sure we have a minimum size */
|
||||
.heap : {
|
||||
. = ALIGN(4);
|
||||
_heap_start = .; /* define a global symbol at heap start */
|
||||
_end = .; /* as expected by syscalls.c */
|
||||
} >RAM
|
||||
|
||||
}
|
9
FIRMWARE/start.S
Normal file
9
FIRMWARE/start.S
Normal file
@@ -0,0 +1,9 @@
|
||||
.equ IO_BASE, 0x400000
|
||||
.section .text
|
||||
.globl start
|
||||
start:
|
||||
li gp,IO_BASE
|
||||
li sp,0x1800
|
||||
call main
|
||||
ebreak
|
||||
|
9
FIRMWARE/start_pipeline.S
Normal file
9
FIRMWARE/start_pipeline.S
Normal file
@@ -0,0 +1,9 @@
|
||||
.equ IO_BASE, 0x400000
|
||||
.section .text
|
||||
.globl start
|
||||
start:
|
||||
li gp,IO_BASE
|
||||
li sp,0x20000
|
||||
call main
|
||||
ebreak
|
||||
|
43
FIRMWARE/start_spiflash1.S
Normal file
43
FIRMWARE/start_spiflash1.S
Normal file
@@ -0,0 +1,43 @@
|
||||
.equ IO_BASE, 0x400000
|
||||
|
||||
.text
|
||||
.global _start
|
||||
.type _start, @function
|
||||
|
||||
_start:
|
||||
.option push
|
||||
.option norelax
|
||||
li gp,IO_BASE
|
||||
.option pop
|
||||
|
||||
li sp,0x1800
|
||||
|
||||
# zero-init bss section:
|
||||
# clears from _sbss to _ebss
|
||||
# _sbss and _ebss are defined by linker script (spiflash.ld)
|
||||
la a0, _sbss
|
||||
la a1, _ebss
|
||||
bge a0, a1, end_init_bss
|
||||
loop_init_bss:
|
||||
sw zero, 0(a0)
|
||||
addi a0, a0, 4
|
||||
blt a0, a1, loop_init_bss
|
||||
end_init_bss:
|
||||
|
||||
# copy data section from SPI Flash to BRAM:
|
||||
# copies from _sidata (in flash) to _sdata ... _edata (in BRAM)
|
||||
# _sidata, _sdata and _edata are defined by linker script (spiflash.ld)
|
||||
la a0, _sidata
|
||||
la a1, _sdata
|
||||
la a2, _edata
|
||||
bge a1, a2, end_init_data
|
||||
loop_init_data:
|
||||
lw a3, 0(a0)
|
||||
sw a3, 0(a1)
|
||||
addi a0, a0, 4
|
||||
addi a1, a1, 4
|
||||
blt a1, a2, loop_init_data
|
||||
end_init_data:
|
||||
|
||||
call main
|
||||
ebreak
|
13
FIRMWARE/test_rdcycle.c
Normal file
13
FIRMWARE/test_rdcycle.c
Normal file
@@ -0,0 +1,13 @@
|
||||
#include "perf.h"
|
||||
|
||||
int main() {
|
||||
for(int i=0; i<100; ++i) {
|
||||
uint64_t cycles = rdcycle();
|
||||
uint64_t instret = rdinstret();
|
||||
printf("i=%d cycles=%d instret=%d\n", i, (int)cycles, (int)instret);
|
||||
}
|
||||
uint64_t instret = rdinstret();
|
||||
uint64_t cycles = rdcycle();
|
||||
printf("cycles=%d instret=%d 100CPI=%d\n", (int)cycles, (int)instret, (int)(100*cycles/instret));
|
||||
|
||||
}
|
17
FIRMWARE/test_spi_flash.c
Normal file
17
FIRMWARE/test_spi_flash.c
Normal file
@@ -0,0 +1,17 @@
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#define SPI_FLASH_BASE ((uint32_t*)(1 << 23))
|
||||
|
||||
int main() {
|
||||
for(;;) {
|
||||
for(int i=0; i<40; ++i) {
|
||||
uint32_t word = SPI_FLASH_BASE[i];
|
||||
char* c = (char*)&word;
|
||||
printf("%d 0x%x %c%c%c%c\n", i, word, c[0],c[1],c[2],c[3]);
|
||||
}
|
||||
printf("\n");
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
}
|
444
FIRMWARE/tinyraytracer.c
Normal file
444
FIRMWARE/tinyraytracer.c
Normal file
@@ -0,0 +1,444 @@
|
||||
/* A port of Dmitry Sokolov's tiny raytracer to C and to FemtoRV32 */
|
||||
/* Displays on the small OLED display and/or HDMI */
|
||||
/* Bruno Levy, 2020 */
|
||||
/* Original tinyraytracer: https://github.com/ssloy/tinyraytracer */
|
||||
|
||||
#include <stdint.h>
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
/*******************************************************************/
|
||||
|
||||
typedef int BOOL;
|
||||
|
||||
static inline float max(float x, float y) { return x>y?x:y; }
|
||||
static inline float min(float x, float y) { return x<y?x:y; }
|
||||
|
||||
/*******************************************************************/
|
||||
|
||||
// If you want to adapt tinyraytracer to your own platform, there are
|
||||
// mostly two macros and two functions to write:
|
||||
// graphics_width
|
||||
// graphics_height
|
||||
// graphics_init()
|
||||
// graphics_set_pixel()
|
||||
//
|
||||
// You can also write the following functions (or leave them empty if
|
||||
// you do not need them):
|
||||
// graphics_terminate()
|
||||
// stats_begin_frame()
|
||||
// stats_begin_pixel()
|
||||
// stats_end_pixel()
|
||||
// stats_end_frame()
|
||||
|
||||
|
||||
// Size of the screen
|
||||
// Replace with your own variables or values
|
||||
#define graphics_width 120
|
||||
#define graphics_height 60
|
||||
|
||||
// Two pixels per character using UTF8 character set
|
||||
// (comment-out if terminal does not support it)
|
||||
#define graphics_double_lines
|
||||
|
||||
// Replace with your own stuff to initialize graphics
|
||||
static inline void graphics_init() {
|
||||
printf("\033[48;5;16m" // set background color black
|
||||
"\033[H" // home
|
||||
"\033[2J"); // clear screen
|
||||
}
|
||||
|
||||
// Replace with your own stuff to terminate graphics or leave empty
|
||||
// Here I send <ctrl><D> to the UART, to exit the simulation in Verilator,
|
||||
// it is captured by special code in RTL/DEVICES/uart.v
|
||||
static inline void graphics_terminate() {
|
||||
}
|
||||
|
||||
|
||||
// Replace with your own code.
|
||||
void graphics_set_pixel(int x, int y, float r, float g, float b) {
|
||||
r = max(0.0f, min(1.0f, r));
|
||||
g = max(0.0f, min(1.0f, g));
|
||||
b = max(0.0f, min(1.0f, b));
|
||||
uint8_t R = (uint8_t)(255.0f * r);
|
||||
uint8_t G = (uint8_t)(255.0f * g);
|
||||
uint8_t B = (uint8_t)(255.0f * b);
|
||||
#ifdef graphics_double_lines
|
||||
static uint8_t prev_R=0;
|
||||
static uint8_t prev_G=0;
|
||||
static uint8_t prev_B=0;
|
||||
if(y&1) {
|
||||
if((R == prev_R) && (G == prev_G) && (B == prev_B)) {
|
||||
printf("\033[48;2;%d;%d;%dm ",(int)R,(int)G,(int)B);
|
||||
} else {
|
||||
printf("\033[48;2;%d;%d;%dm",(int)prev_R,(int)prev_G,(int)prev_B);
|
||||
printf("\033[38;2;%d;%d;%dm",(int)R,(int)G,(int)B);
|
||||
// https://www.w3.org/TR/xml-entity-names/025.html
|
||||
// https://onlineunicodetools.com/convert-unicode-to-utf8
|
||||
printf("\xE2\x96\x83");
|
||||
}
|
||||
if(x == graphics_width-1) {
|
||||
printf("\033[38;2;0;0;0m");
|
||||
printf("\033[48;2;0;0;0m\n");
|
||||
}
|
||||
} else {
|
||||
prev_R = R;
|
||||
prev_G = G;
|
||||
prev_B = B;
|
||||
}
|
||||
#else
|
||||
printf("\033[48;2;%d;%d;%dm ",(int)R,(int)G,(int)B);
|
||||
if(x == graphics_width-1) {
|
||||
printf("\033[48;2;0;0;0m\n");
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
// Begins statistics collection for current frame.
|
||||
// Leave emtpy if not needed.
|
||||
static inline stats_begin_frame() {
|
||||
}
|
||||
|
||||
// Begins statistics collection for current pixel
|
||||
// Leave emtpy if not needed.
|
||||
// There are these two levels because on some
|
||||
// femtorv32 cores (quark, tachyon), the clock tick counter does not
|
||||
// have sufficient bits and will wrap during the time taken by
|
||||
// rendering a frame (up to several minutes).
|
||||
static inline stats_begin_pixel() {
|
||||
}
|
||||
|
||||
// Ends statistics collection for current pixel
|
||||
// Leave emtpy if not needed.
|
||||
static inline stats_end_pixel() {
|
||||
}
|
||||
|
||||
// Ends statistics collection for current frame
|
||||
// and displays result.
|
||||
// Leave emtpy if not needed.
|
||||
static inline stats_end_frame() {
|
||||
}
|
||||
|
||||
// Normally you will not need to modify anything beyond that point.
|
||||
/*******************************************************************/
|
||||
|
||||
typedef struct { float x,y,z; } vec3;
|
||||
typedef struct { float x,y,z,w; } vec4;
|
||||
|
||||
static inline vec3 make_vec3(float x, float y, float z) {
|
||||
vec3 V;
|
||||
V.x = x; V.y = y; V.z = z;
|
||||
return V;
|
||||
}
|
||||
|
||||
static inline vec4 make_vec4(float x, float y, float z, float w) {
|
||||
vec4 V;
|
||||
V.x = x; V.y = y; V.z = z; V.w = w;
|
||||
return V;
|
||||
}
|
||||
|
||||
static inline vec3 vec3_neg(vec3 V) {
|
||||
return make_vec3(-V.x, -V.y, -V.z);
|
||||
}
|
||||
|
||||
static inline vec3 vec3_add(vec3 U, vec3 V) {
|
||||
return make_vec3(U.x+V.x, U.y+V.y, U.z+V.z);
|
||||
}
|
||||
|
||||
static inline vec3 vec3_sub(vec3 U, vec3 V) {
|
||||
return make_vec3(U.x-V.x, U.y-V.y, U.z-V.z);
|
||||
}
|
||||
|
||||
static inline float vec3_dot(vec3 U, vec3 V) {
|
||||
return U.x*V.x+U.y*V.y+U.z*V.z;
|
||||
}
|
||||
|
||||
static inline vec3 vec3_scale(float s, vec3 U) {
|
||||
return make_vec3(s*U.x, s*U.y, s*U.z);
|
||||
}
|
||||
|
||||
static inline float vec3_length(vec3 U) {
|
||||
return sqrtf(U.x*U.x+U.y*U.y+U.z*U.z);
|
||||
}
|
||||
|
||||
static inline vec3 vec3_normalize(vec3 U) {
|
||||
return vec3_scale(1.0f/vec3_length(U),U);
|
||||
}
|
||||
|
||||
/*************************************************************************/
|
||||
|
||||
typedef struct Light {
|
||||
vec3 position;
|
||||
float intensity;
|
||||
} Light;
|
||||
|
||||
Light make_Light(vec3 position, float intensity) {
|
||||
Light L;
|
||||
L.position = position;
|
||||
L.intensity = intensity;
|
||||
return L;
|
||||
}
|
||||
|
||||
/*************************************************************************/
|
||||
|
||||
typedef struct {
|
||||
float refractive_index;
|
||||
vec4 albedo;
|
||||
vec3 diffuse_color;
|
||||
float specular_exponent;
|
||||
} Material;
|
||||
|
||||
Material make_Material(float r, vec4 a, vec3 color, float spec) {
|
||||
Material M;
|
||||
M.refractive_index = r;
|
||||
M.albedo = a;
|
||||
M.diffuse_color = color;
|
||||
M.specular_exponent = spec;
|
||||
return M;
|
||||
}
|
||||
|
||||
Material make_Material_default() {
|
||||
Material M;
|
||||
M.refractive_index = 1;
|
||||
M.albedo = make_vec4(1,0,0,0);
|
||||
M.diffuse_color = make_vec3(0,0,0);
|
||||
M.specular_exponent = 0;
|
||||
return M;
|
||||
}
|
||||
|
||||
/*************************************************************************/
|
||||
|
||||
typedef struct {
|
||||
vec3 center;
|
||||
float radius;
|
||||
Material material;
|
||||
} Sphere;
|
||||
|
||||
Sphere make_Sphere(vec3 c, float r, Material M) {
|
||||
Sphere S;
|
||||
S.center = c;
|
||||
S.radius = r;
|
||||
S.material = M;
|
||||
return S;
|
||||
}
|
||||
|
||||
BOOL Sphere_ray_intersect(Sphere* S, vec3 orig, vec3 dir, float* t0) {
|
||||
vec3 L = vec3_sub(S->center, orig);
|
||||
float tca = vec3_dot(L,dir);
|
||||
float d2 = vec3_dot(L,L) - tca*tca;
|
||||
float r2 = S->radius*S->radius;
|
||||
if (d2 > r2) return 0;
|
||||
float thc = sqrtf(r2 - d2);
|
||||
*t0 = tca - thc;
|
||||
float t1 = tca + thc;
|
||||
if (*t0 < 0) *t0 = t1;
|
||||
if (*t0 < 0) return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
vec3 reflect(vec3 I, vec3 N) {
|
||||
return vec3_sub(I, vec3_scale(2.f*vec3_dot(I,N),N));
|
||||
}
|
||||
|
||||
vec3 refract(vec3 I, vec3 N, float eta_t, float eta_i /* =1.f */) {
|
||||
// Snell's law
|
||||
float cosi = -max(-1.f, min(1.f, vec3_dot(I,N)));
|
||||
// if the ray comes from the inside the object, swap the air and the media
|
||||
if (cosi<0) return refract(I, vec3_neg(N), eta_i, eta_t);
|
||||
float eta = eta_i / eta_t;
|
||||
float k = 1 - eta*eta*(1 - cosi*cosi);
|
||||
// k<0 = total reflection, no ray to refract.
|
||||
// I refract it anyways, this has no physical meaning
|
||||
return k<0 ? make_vec3(1,0,0)
|
||||
: vec3_add(vec3_scale(eta,I),vec3_scale((eta*cosi - sqrtf(k)),N));
|
||||
}
|
||||
|
||||
BOOL scene_intersect(
|
||||
vec3 orig, vec3 dir, Sphere* spheres, int nb_spheres,
|
||||
vec3* hit, vec3* N, Material* material
|
||||
) {
|
||||
float spheres_dist = 1e30;
|
||||
for(int i=0; i<nb_spheres; ++i) {
|
||||
float dist_i;
|
||||
if(
|
||||
Sphere_ray_intersect(&spheres[i], orig, dir, &dist_i) &&
|
||||
(dist_i < spheres_dist)
|
||||
) {
|
||||
spheres_dist = dist_i;
|
||||
*hit = vec3_add(orig,vec3_scale(dist_i,dir));
|
||||
*N = vec3_normalize(vec3_sub(*hit, spheres[i].center));
|
||||
*material = spheres[i].material;
|
||||
}
|
||||
}
|
||||
float checkerboard_dist = 1e30;
|
||||
if (fabs(dir.y)>1e-3) {
|
||||
float d = -(orig.y+4)/dir.y; // the checkerboard plane has equation y = -4
|
||||
vec3 pt = vec3_add(orig, vec3_scale(d,dir));
|
||||
if (d>0 && fabs(pt.x)<10 && pt.z<-10 && pt.z>-30 && d<spheres_dist) {
|
||||
checkerboard_dist = d;
|
||||
*hit = pt;
|
||||
*N = make_vec3(0,1,0);
|
||||
material->diffuse_color =
|
||||
(((int)(.5*hit->x+1000) + (int)(.5*hit->z)) & 1)
|
||||
? make_vec3(.3, .3, .3)
|
||||
: make_vec3(.3, .2, .1);
|
||||
}
|
||||
}
|
||||
return min(spheres_dist, checkerboard_dist)<1000;
|
||||
}
|
||||
|
||||
vec3 cast_ray(
|
||||
vec3 orig, vec3 dir, Sphere* spheres, int nb_spheres,
|
||||
Light* lights, int nb_lights, int depth /* =0 */
|
||||
) {
|
||||
vec3 point,N;
|
||||
Material material = make_Material_default();
|
||||
if (
|
||||
depth>2 ||
|
||||
!scene_intersect(orig, dir, spheres, nb_spheres, &point, &N, &material)
|
||||
) {
|
||||
float s = 0.5*(dir.y + 1.0);
|
||||
return vec3_add(
|
||||
vec3_scale(s,make_vec3(0.2, 0.7, 0.8)),
|
||||
vec3_scale(s,make_vec3(0.0, 0.0, 0.5))
|
||||
);
|
||||
}
|
||||
|
||||
vec3 reflect_dir=vec3_normalize(reflect(dir, N));
|
||||
vec3 refract_dir=vec3_normalize(refract(dir,N,material.refractive_index,1));
|
||||
|
||||
// offset the original point to avoid occlusion by the object itself
|
||||
vec3 reflect_orig =
|
||||
vec3_dot(reflect_dir,N) < 0
|
||||
? vec3_sub(point,vec3_scale(1e-3,N))
|
||||
: vec3_add(point,vec3_scale(1e-3,N));
|
||||
vec3 refract_orig =
|
||||
vec3_dot(refract_dir,N) < 0
|
||||
? vec3_sub(point,vec3_scale(1e-3,N))
|
||||
: vec3_add(point,vec3_scale(1e-3,N));
|
||||
vec3 reflect_color = cast_ray(
|
||||
reflect_orig, reflect_dir, spheres, nb_spheres,
|
||||
lights, nb_lights, depth + 1
|
||||
);
|
||||
vec3 refract_color = cast_ray(
|
||||
refract_orig, refract_dir, spheres, nb_spheres,
|
||||
lights, nb_lights, depth + 1
|
||||
);
|
||||
|
||||
float diffuse_light_intensity = 0, specular_light_intensity = 0;
|
||||
for (int i=0; i<nb_lights; i++) {
|
||||
vec3 light_dir = vec3_normalize(vec3_sub(lights[i].position,point));
|
||||
float light_distance = vec3_length(vec3_sub(lights[i].position,point));
|
||||
|
||||
vec3 shadow_orig =
|
||||
vec3_dot(light_dir,N) < 0
|
||||
? vec3_sub(point,vec3_scale(1e-3,N))
|
||||
: vec3_add(point,vec3_scale(1e-3,N)) ;
|
||||
// checking if the point lies in the shadow of the lights[i]
|
||||
vec3 shadow_pt, shadow_N;
|
||||
Material tmpmaterial;
|
||||
if (
|
||||
scene_intersect(
|
||||
shadow_orig, light_dir, spheres, nb_spheres,
|
||||
&shadow_pt, &shadow_N, &tmpmaterial
|
||||
) && (
|
||||
vec3_length(vec3_sub(shadow_pt,shadow_orig)) < light_distance
|
||||
)
|
||||
) continue ;
|
||||
|
||||
diffuse_light_intensity +=
|
||||
lights[i].intensity * max(0.f, vec3_dot(light_dir,N));
|
||||
|
||||
float abc = max(
|
||||
0.f, vec3_dot(vec3_neg(reflect(vec3_neg(light_dir), N)),dir)
|
||||
);
|
||||
float def = material.specular_exponent;
|
||||
if(abc > 0.0f && def > 0.0f) {
|
||||
specular_light_intensity += powf(abc,def)*lights[i].intensity;
|
||||
}
|
||||
}
|
||||
vec3 result = vec3_scale(
|
||||
diffuse_light_intensity * material.albedo.x, material.diffuse_color
|
||||
);
|
||||
result = vec3_add(
|
||||
result, vec3_scale(specular_light_intensity * material.albedo.y,
|
||||
make_vec3(1,1,1))
|
||||
);
|
||||
result = vec3_add(result, vec3_scale(material.albedo.z, reflect_color));
|
||||
result = vec3_add(result, vec3_scale(material.albedo.w, refract_color));
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline void render_pixel(
|
||||
int i, int j, Sphere* spheres, int nb_spheres, Light* lights, int nb_lights
|
||||
) {
|
||||
const float fov = M_PI/3.;
|
||||
stats_begin_pixel();
|
||||
float dir_x = (i + 0.5) - graphics_width/2.;
|
||||
float dir_y = -(j + 0.5) + graphics_height/2.; // this flips the image.
|
||||
float dir_z = -graphics_height/(2.*tan(fov/2.));
|
||||
vec3 C = cast_ray(
|
||||
make_vec3(0,0,0), vec3_normalize(make_vec3(dir_x, dir_y, dir_z)),
|
||||
spheres, nb_spheres, lights, nb_lights, 0
|
||||
);
|
||||
graphics_set_pixel(i,j,C.x,C.y,C.z);
|
||||
stats_end_pixel();
|
||||
}
|
||||
|
||||
void render(Sphere* spheres, int nb_spheres, Light* lights, int nb_lights) {
|
||||
stats_begin_frame();
|
||||
graphics_init();
|
||||
#ifdef graphics_double_lines
|
||||
for (int j = 0; j<graphics_height; j+=2) {
|
||||
for (int i = 0; i<graphics_width; i++) {
|
||||
render_pixel(i,j ,spheres,nb_spheres,lights,nb_lights);
|
||||
render_pixel(i,j+1,spheres,nb_spheres,lights,nb_lights);
|
||||
}
|
||||
}
|
||||
#else
|
||||
for (int j = 0; j<graphics_height; j++) {
|
||||
for (int i = 0; i<graphics_width; i++) {
|
||||
render_pixel(i,j ,spheres,nb_spheres,lights,nb_lights);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
graphics_terminate();
|
||||
stats_end_frame();
|
||||
}
|
||||
|
||||
int nb_spheres = 4;
|
||||
Sphere spheres[4];
|
||||
|
||||
int nb_lights = 3;
|
||||
Light lights[3];
|
||||
|
||||
void init_scene() {
|
||||
Material ivory = make_Material(
|
||||
1.0, make_vec4(0.6, 0.3, 0.1, 0.0), make_vec3(0.4, 0.4, 0.3), 50.
|
||||
);
|
||||
Material glass = make_Material(
|
||||
1.5, make_vec4(0.0, 0.5, 0.1, 0.8), make_vec3(0.6, 0.7, 0.8), 125.
|
||||
);
|
||||
Material red_rubber = make_Material(
|
||||
1.0, make_vec4(0.9, 0.1, 0.0, 0.0), make_vec3(0.3, 0.1, 0.1), 10.
|
||||
);
|
||||
Material mirror = make_Material(
|
||||
1.0, make_vec4(0.0, 10.0, 0.8, 0.0), make_vec3(1.0, 1.0, 1.0), 142.
|
||||
);
|
||||
|
||||
spheres[0] = make_Sphere(make_vec3(-3, 0, -16), 2, ivory);
|
||||
spheres[1] = make_Sphere(make_vec3(-1.0, -1.5, -12), 2, glass);
|
||||
spheres[2] = make_Sphere(make_vec3( 1.5, -0.5, -18), 3, red_rubber);
|
||||
spheres[3] = make_Sphere(make_vec3( 7, 5, -18), 4, mirror);
|
||||
|
||||
lights[0] = make_Light(make_vec3(-20, 20, 20), 1.5);
|
||||
lights[1] = make_Light(make_vec3( 30, 50, -25), 1.8);
|
||||
lights[2] = make_Light(make_vec3( 30, 20, 30), 1.7);
|
||||
}
|
||||
|
||||
int main() {
|
||||
init_scene();
|
||||
render(spheres, nb_spheres, lights, nb_lights);
|
||||
return 0;
|
||||
}
|
173
FIRMWARE/tty_graphics.h
Normal file
173
FIRMWARE/tty_graphics.h
Normal file
@@ -0,0 +1,173 @@
|
||||
#ifndef TTY_GRAPHICS_H
|
||||
#define TTY_GRAPHICS_H
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
|
||||
/**
|
||||
* \brief Resets default tty colors (white foreground, black background)
|
||||
* \details It is useful to call this function once all graphics are finished,
|
||||
* else text output might be invisible or difficult to see depending on
|
||||
* current foreground and background colors.
|
||||
*/
|
||||
static inline void tty_graphics_reset_colors() {
|
||||
printf("\033[48;5;16m" // set background color black
|
||||
"\033[38;5;15m" // set foreground color white
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Moves the cursor position to the origin (top left).
|
||||
*/
|
||||
static inline void tty_graphics_home() {
|
||||
printf("\033[H");
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Clears the terminal.
|
||||
*/
|
||||
static inline void tty_graphics_clear() {
|
||||
printf("\033[2J");
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Initializes "graphics mode".
|
||||
* \details resets default colors, clears the terminal and moves the
|
||||
* cursor to the top-left position.
|
||||
*/
|
||||
static inline void tty_graphics_init() {
|
||||
tty_graphics_reset_colors();
|
||||
tty_graphics_home();
|
||||
tty_graphics_clear();
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Terminates "graphics mode".
|
||||
* \details Restores default foreground and background colors.
|
||||
*/
|
||||
static inline void tty_graphics_terminate() {
|
||||
tty_graphics_reset_colors();
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Moves the cursor to a specific location.
|
||||
*/
|
||||
static inline void tty_graphics_gotoXY(int x, int y) {
|
||||
printf("\033[%d;%dH",y,x);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Draws a "pixel" (a block) at the current
|
||||
* cursor position and advances the current cursor
|
||||
* position.
|
||||
*/
|
||||
static inline void tty_graphics_draw_one_pixel(
|
||||
uint8_t r, uint8_t g, uint8_t b
|
||||
) {
|
||||
printf("\033[48;2;%d;%d;%dm ",(int)r,(int)g,(int)b);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Draws two "pixels" at the current
|
||||
* cursor position and advances the current cursor
|
||||
* position.
|
||||
* \details Characters are roughly twice as high as wide.
|
||||
* To generate square pixels, this function draws two pixels in
|
||||
* the same character, using the special lower-half white / upper-half
|
||||
* black character, and setting the background and foreground colors.
|
||||
*/
|
||||
static inline void tty_graphics_draw_two_pixels(
|
||||
uint8_t r1, uint8_t g1, uint8_t b1,
|
||||
uint8_t r2, uint8_t g2, uint8_t b2
|
||||
) {
|
||||
if((r2 == r1) && (g2 == g1) && (b2 == b1)) {
|
||||
tty_graphics_draw_one_pixel(r1,g1,b1);
|
||||
} else {
|
||||
printf("\033[48;2;%d;%d;%dm",(int)r1,(int)g1,(int)b1);
|
||||
printf("\033[38;2;%d;%d;%dm",(int)r2,(int)g2,(int)b2);
|
||||
// https://www.w3.org/TR/xml-entity-names/025.html
|
||||
// https://onlineunicodetools.com/convert-unicode-to-utf8
|
||||
// https://copypastecharacter.com/
|
||||
printf("\xE2\x96\x83");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Moves the cursor position to the next line.
|
||||
* \details Background and foreground colors are set to black.
|
||||
*/
|
||||
static inline void tty_graphics_newline() {
|
||||
printf("\033[38;2;0;0;0m");
|
||||
printf("\033[48;2;0;0;0m\n");
|
||||
}
|
||||
|
||||
typedef void (*tty_graphics_pixelfunc)(int x, int y, uint8_t* r, uint8_t* g, uint8_t* b);
|
||||
typedef void (*tty_graphics_fpixelfunc)(int x, int y, float* r, float* g, float* b);
|
||||
|
||||
/**
|
||||
* \brief Draws an image by calling a user-specified function for each pixel.
|
||||
* \param[in] width , height dimension of the image in square pixels
|
||||
* \param[in] do_pixel the user function to be called for each pixel (a "shader"), that
|
||||
* determines the (integer) components r,g,b of the pixel's color.
|
||||
* \details Uses half-charater pixels.
|
||||
*/
|
||||
static inline void tty_graphics_scan(int width, int height, tty_graphics_pixelfunc do_pixel) {
|
||||
uint8_t r1, g1, b1;
|
||||
uint8_t r2, g2, b2;
|
||||
tty_graphics_home();
|
||||
for (int j = 0; j<height; j+=2) {
|
||||
for (int i = 0; i<width; i++) {
|
||||
do_pixel(i,j , &r1, &g1, &b1);
|
||||
do_pixel(i,j+1, &r2, &g2, &b2);
|
||||
tty_graphics_draw_two_pixels(r1,g1,b1,r2,g2,b2);
|
||||
if(i == width-1) {
|
||||
tty_graphics_newline();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* brief Converts a floating point value to a byte.
|
||||
* \param[in] the floating point value in [0,1]
|
||||
* \return the byte, in [0,255]
|
||||
* \details the input value is clamped to [0,1]
|
||||
*/
|
||||
static inline uint8_t tty_graphics_ftoi(float f) {
|
||||
f = (f < 0.0f) ? 0.0f : f;
|
||||
f = (f > 1.0f) ? 1.0f : f;
|
||||
return (uint8_t)(255.0f * f);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Draws an image by calling a user-specified function for each pixel.
|
||||
* \param[in] width , height dimension of the image in square pixels
|
||||
* \param[in] do_pixel the user function to be called for each pixel (a "shader"), that
|
||||
* determines the (floating-point) components fr,fg,fb of the pixel's color.
|
||||
* \details Uses half-charater pixels.
|
||||
*/
|
||||
static inline void tty_graphics_fscan(int width, int height, tty_graphics_fpixelfunc do_pixel) {
|
||||
float fr1, fg1, fb1;
|
||||
float fr2, fg2, fb2;
|
||||
uint8_t r1, g1, b1;
|
||||
uint8_t r2, g2, b2;
|
||||
tty_graphics_home();
|
||||
for (int j = 0; j<height; j+=2) {
|
||||
for (int i = 0; i<width; i++) {
|
||||
do_pixel(i,j , &fr1, &fg1, &fb1);
|
||||
r1 = tty_graphics_ftoi(fr1);
|
||||
g1 = tty_graphics_ftoi(fg1);
|
||||
b1 = tty_graphics_ftoi(fb1);
|
||||
do_pixel(i,j+1, &fr2, &fg2, &fb2);
|
||||
r2 = tty_graphics_ftoi(fr2);
|
||||
g2 = tty_graphics_ftoi(fg2);
|
||||
b2 = tty_graphics_ftoi(fb2);
|
||||
tty_graphics_draw_two_pixels(r1,g1,b1,r2,g2,b2);
|
||||
if(i == width-1) {
|
||||
tty_graphics_newline();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
38
FIRMWARE/tty_graphics_demo.c
Normal file
38
FIRMWARE/tty_graphics_demo.c
Normal file
@@ -0,0 +1,38 @@
|
||||
#include "tty_graphics.h"
|
||||
#include <math.h>
|
||||
|
||||
#ifdef __linux__
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
// Size of the screen
|
||||
// Replace with your own variables or values
|
||||
#define graphics_width 80
|
||||
#define graphics_height 40
|
||||
|
||||
int frame = 0;
|
||||
float f = 0.0;
|
||||
|
||||
void do_pixel(int i, int j, float* R, float* G, float* B) {
|
||||
float x = (float)i;
|
||||
float y = (float)j;
|
||||
*R = 0.5f*(sin(x*0.1+f)+1.0);
|
||||
*G = 0.5f*(sin(y*0.1+2.0*f)+1.0);
|
||||
*B = 0.5f*(sin((x+y)*0.05-3.0*f)+1.0);
|
||||
}
|
||||
|
||||
int main() {
|
||||
tty_graphics_init();
|
||||
for(;;) {
|
||||
tty_graphics_fscan(graphics_width, graphics_height, do_pixel);
|
||||
f += 0.1;
|
||||
++frame;
|
||||
tty_graphics_reset_colors();
|
||||
printf("frame = %d\n",frame);
|
||||
#ifdef __linux__
|
||||
usleep(40000);
|
||||
#endif
|
||||
}
|
||||
return 0;
|
||||
}
|
11
FIRMWARE/wait.S
Normal file
11
FIRMWARE/wait.S
Normal file
@@ -0,0 +1,11 @@
|
||||
.section .text
|
||||
.globl wait
|
||||
|
||||
wait:
|
||||
li t0,1
|
||||
slli t0, t0,17
|
||||
.L0:
|
||||
addi t0,t0,-1
|
||||
bnez t0, .L0
|
||||
ret
|
||||
|
3694
LESSON1.md
Normal file
3694
LESSON1.md
Normal file
File diff suppressed because it is too large
Load Diff
8
README.md
Normal file
8
README.md
Normal file
@@ -0,0 +1,8 @@
|
||||
## Toolchain
|
||||
- Yosys / Yosys NextPNR / Yosys Apicula
|
||||
|
||||
## ToDo
|
||||
|
||||
- Check documentation Yosys !
|
||||
- TOBB labs
|
||||
- Anki cards
|
24
step1.v
Normal file
24
step1.v
Normal file
@@ -0,0 +1,24 @@
|
||||
/**
|
||||
* Step 1: Blinker
|
||||
* DONE
|
||||
*/
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module SOC (
|
||||
input clk, // system clock
|
||||
input rst_i, // reset button
|
||||
output [3:0] led, // system LEDs
|
||||
input RXD, // UART receive
|
||||
output TXD // UART transmit
|
||||
);
|
||||
|
||||
|
||||
// A blinker that counts on 5 bits, wired to the 5 LEDs
|
||||
reg [3:0] count = 0;
|
||||
always @(posedge clk) begin
|
||||
count <= count + 1;
|
||||
end
|
||||
assign led = count;
|
||||
assign TXD = 1'b0; // not used for now
|
||||
endmodule
|
40
step2.v
Normal file
40
step2.v
Normal file
@@ -0,0 +1,40 @@
|
||||
/**
|
||||
* Step 2: Blinker (slower version)
|
||||
* DONE*
|
||||
*/
|
||||
|
||||
`default_nettype none
|
||||
`include "clockworks.v"
|
||||
|
||||
module SOC (
|
||||
input clk, // system clock
|
||||
input rst_i, // reset button
|
||||
output [4:0] led, // system LEDs
|
||||
input RXD, // UART receive
|
||||
output TXD // UART transmit
|
||||
);
|
||||
|
||||
wire clkI; // internal clock
|
||||
wire resetn; // internal reset signal, goes low on reset
|
||||
|
||||
// A blinker that counts on 5 bits, wired to the 5 LEDs
|
||||
reg [4:0] count = 0;
|
||||
always @(posedge clkI) begin
|
||||
count <= !resetn ? 0 : count + 1;
|
||||
end
|
||||
|
||||
// Clock gearbox (to let you see what happens)
|
||||
// and reset circuitry (to workaround an
|
||||
// initialization problem with Ice40)
|
||||
Clockworks #(
|
||||
.SLOW(21) // Divide clock frequency by 2^21
|
||||
)CW(
|
||||
.CLK(clk),
|
||||
.RESET(rst_i),
|
||||
.clk(clkI),
|
||||
.resetn(resetn)
|
||||
);
|
||||
|
||||
assign led = count;
|
||||
assign TXD = 1'b0; // not used for now
|
||||
endmodule
|
65
step3.v
Normal file
65
step3.v
Normal file
@@ -0,0 +1,65 @@
|
||||
/**
|
||||
* Step 3: Display a led pattern "animation" stored in BRAM.
|
||||
* DONE*
|
||||
*/
|
||||
|
||||
`default_nettype none
|
||||
`include "clockworks.v"
|
||||
|
||||
module SOC (
|
||||
input clk, // system clock
|
||||
input rst_i, // reset button
|
||||
output [4:0] led, // system LEDs
|
||||
input RXD, // UART receive
|
||||
output TXD // UART transmit
|
||||
);
|
||||
|
||||
wire clkI; // internal clock
|
||||
wire resetn; // internal reset signal, goes low on reset
|
||||
|
||||
reg [4:0] PC = 0;
|
||||
reg [4:0] MEM [0:20];
|
||||
initial begin
|
||||
MEM[0] = 5'b00000;
|
||||
MEM[1] = 5'b00001;
|
||||
MEM[2] = 5'b00010;
|
||||
MEM[3] = 5'b00100;
|
||||
MEM[4] = 5'b01000;
|
||||
MEM[5] = 5'b10000;
|
||||
MEM[6] = 5'b10001;
|
||||
MEM[7] = 5'b10010;
|
||||
MEM[8] = 5'b10100;
|
||||
MEM[9] = 5'b11000;
|
||||
MEM[10] = 5'b11001;
|
||||
MEM[11] = 5'b11010;
|
||||
MEM[12] = 5'b11100;
|
||||
MEM[13] = 5'b11101;
|
||||
MEM[14] = 5'b11110;
|
||||
MEM[15] = 5'b11111;
|
||||
MEM[16] = 5'b11110;
|
||||
MEM[17] = 5'b11100;
|
||||
MEM[18] = 5'b11000;
|
||||
MEM[19] = 5'b10000;
|
||||
MEM[20] = 5'b00000;
|
||||
end
|
||||
|
||||
reg [4:0] leds = 0;
|
||||
assign led=leds;
|
||||
|
||||
always @(posedge clkI) begin
|
||||
leds <= MEM[PC];
|
||||
PC <= (!resetn || PC==20) ? 0 : (PC+1);
|
||||
end
|
||||
|
||||
// Gearbox and reset circuitry.
|
||||
Clockworks #(
|
||||
.SLOW(25) // Divide clock frequency by 2^21
|
||||
)CW(
|
||||
.CLK(clk),
|
||||
.RESET(rst_i),
|
||||
.clk(clkI),
|
||||
.resetn(resetn)
|
||||
);
|
||||
|
||||
assign TXD = 1'b0; // not used for now
|
||||
endmodule
|
59
step3K.v
Normal file
59
step3K.v
Normal file
@@ -0,0 +1,59 @@
|
||||
`include "clockworks.v"
|
||||
|
||||
module SOC (
|
||||
input clk,
|
||||
input rst_i,
|
||||
output [4:0] led,
|
||||
output TXD,
|
||||
input RXD
|
||||
);
|
||||
|
||||
wire clkI, resetn;
|
||||
|
||||
reg [4:0] PC = 0;
|
||||
reg [4:0] MEM [0:20];
|
||||
|
||||
initial begin
|
||||
MEM[0] = 5'b00000;
|
||||
MEM[1] = 5'b00001;
|
||||
MEM[2] = 5'b00010;
|
||||
MEM[3] = 5'b00100;
|
||||
MEM[4] = 5'b01000;
|
||||
MEM[5] = 5'b10000;
|
||||
MEM[6] = 5'b10001;
|
||||
MEM[7] = 5'b10010;
|
||||
MEM[8] = 5'b10100;
|
||||
MEM[9] = 5'b11000;
|
||||
MEM[10] = 5'b11001;
|
||||
MEM[11] = 5'b11010;
|
||||
MEM[12] = 5'b11100;
|
||||
MEM[13] = 5'b11101;
|
||||
MEM[14] = 5'b11110;
|
||||
MEM[15] = 5'b11111;
|
||||
MEM[16] = 5'b11110;
|
||||
MEM[17] = 5'b11100;
|
||||
MEM[18] = 5'b11000;
|
||||
MEM[19] = 5'b10000;
|
||||
MEM[20] = 5'b00000;
|
||||
end
|
||||
|
||||
reg [4:0] leds = 0;
|
||||
assign led = leds;
|
||||
|
||||
always @(posedge clkI) begin
|
||||
leds <= MEM[PC];
|
||||
PC <= (!resetn || PC == 20) ? 0 : (PC + 1);
|
||||
end
|
||||
|
||||
Clockworks #(
|
||||
.SLOW(21)
|
||||
)clkw(
|
||||
.CLK(clk),
|
||||
.RESET(rst_i),
|
||||
.clk(clkI),
|
||||
.resetn(resetn)
|
||||
);
|
||||
|
||||
assign TXD = 1'b0;
|
||||
|
||||
endmodule
|
Reference in New Issue
Block a user