initial commit

This commit is contained in:
2025-08-02 06:09:31 +03:00
commit 00015ffc03
85 changed files with 62051 additions and 0 deletions

27
BOARDS/arty.xdc Normal file
View File

@@ -0,0 +1,27 @@
# Clock pin
set_property PACKAGE_PIN E3 [get_ports CLK]
set_property IOSTANDARD LVCMOS33 [get_ports CLK]
# LEDs
set_property PACKAGE_PIN H5 [get_ports LEDS[0]]
set_property PACKAGE_PIN J5 [get_ports LEDS[1]]
set_property PACKAGE_PIN T9 [get_ports LEDS[2]]
set_property PACKAGE_PIN T10 [get_ports LEDS[3]]
set_property IOSTANDARD LVCMOS33 [get_ports LEDS[0]]
set_property IOSTANDARD LVCMOS33 [get_ports LEDS[1]]
set_property IOSTANDARD LVCMOS33 [get_ports LEDS[2]]
set_property IOSTANDARD LVCMOS33 [get_ports LEDS[3]]
# Clock constraints
create_clock -period 10.0 [get_ports CLK]
# UART
set_property LOC D10 [get_ports TXD]
set_property LOC A9 [get_ports RXD]
set_property IOSTANDARD LVCMOS33 [get_ports RXD]
set_property IOSTANDARD LVCMOS33 [get_ports TXD]
# reset button
set_property LOC C2 [get_ports RESET]
set_property IOSTANDARD LVCMOS33 [get_ports RESET]

29
BOARDS/cmod_a7.xdc Normal file
View File

@@ -0,0 +1,29 @@
# Clock pin
set_property PACKAGE_PIN L17 [get_ports CLK]
set_property IOSTANDARD LVCMOS33 [get_ports CLK]
# LEDs
set_property PACKAGE_PIN A17 [get_ports LEDS[0]]
set_property PACKAGE_PIN C16 [get_ports LEDS[1]]
set_property PACKAGE_PIN B17 [get_ports LEDS[2]]
set_property PACKAGE_PIN B16 [get_ports LEDS[3]]
set_property PACKAGE_PIN C17 [get_ports LEDS[4]]
set_property IOSTANDARD LVCMOS33 [get_ports LEDS[0]]
set_property IOSTANDARD LVCMOS33 [get_ports LEDS[1]]
set_property IOSTANDARD LVCMOS33 [get_ports LEDS[2]]
set_property IOSTANDARD LVCMOS33 [get_ports LEDS[3]]
set_property IOSTANDARD LVCMOS33 [get_ports LEDS[4]]
# Clock constraints
create_clock -period 83.33 [get_ports CLK]
# UART
set_property LOC G17 [get_ports TXD]
set_property LOC G19 [get_ports RXD]
set_property IOSTANDARD LVCMOS33 [get_ports RXD]
set_property IOSTANDARD LVCMOS33 [get_ports TXD]
# reset button
set_property LOC A18 [get_ports RESET]
set_property IOSTANDARD LVCMOS33 [get_ports RESET]

35
BOARDS/ecp5_evn.lpf Normal file
View File

@@ -0,0 +1,35 @@
# See https://github.com/emard/ulx3s/blob/master/doc/constraints/ulx3s_v20.lpf
## Clock #########################################
LOCATE COMP "CLK" SITE "A10";
IOBUF PORT "CLK" IO_TYPE=LVCMOS33;
FREQUENCY PORT "CLK" 12 MHZ;
## RESET button ##################################
LOCATE COMP "RESET" SITE "P4";
IOBUF PORT "RESET" IO_TYPE=LVCMOS33;
## LEDs ##########################################
LOCATE COMP "LEDS[0]" SITE "B17";
LOCATE COMP "LEDS[1]" SITE "A17";
LOCATE COMP "LEDS[2]" SITE "C17";
LOCATE COMP "LEDS[3]" SITE "B18";
LOCATE COMP "LEDS[4]" SITE "A18";
IOBUF PORT "LEDS[0]" IO_TYPE=LVCMOS33;
IOBUF PORT "LEDS[1]" IO_TYPE=LVCMOS33;
IOBUF PORT "LEDS[2]" IO_TYPE=LVCMOS33;
IOBUF PORT "LEDS[3]" IO_TYPE=LVCMOS33;
IOBUF PORT "LEDS[4]" IO_TYPE=LVCMOS33;
## UART ######################################################
LOCATE COMP "TXD" SITE "D11";
LOCATE COMP "RXD" SITE "D12";
IOBUF PORT "TXD" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
IOBUF PORT "RXD" PULLMODE=UP IO_TYPE=LVCMOS33;

13
BOARDS/icebreaker.pcf Normal file
View File

@@ -0,0 +1,13 @@
set_io CLK 35
set_io LEDS[0] 27
set_io LEDS[1] 21
set_io LEDS[2] 25
set_io LEDS[3] 23
set_io LEDS[4] 26
set_io TXD 9
set_io RXD 6
set_io RESET 10

21
BOARDS/icestick.pcf Normal file
View File

@@ -0,0 +1,21 @@
set_io CLK 21
set_io LEDS[0] 99
set_io LEDS[1] 98
set_io LEDS[2] 97
set_io LEDS[3] 96
set_io LEDS[4] 95
set_io TXD 8
set_io RXD 9
set_io SPIFLASH_CLK 70
set_io SPIFLASH_CS_N 71
set_io SPIFLASH_MOSI 67
set_io SPIFLASH_MISO 68
set_io SPIFLASH_IO[0] 67
set_io SPIFLASH_IO[1] 68
set_io RESET 47

18
BOARDS/run_arty.sh Executable file
View File

@@ -0,0 +1,18 @@
#!/usr/bin/env bash
PROJECT_NAME=SOC
DB_DIR=/usr/share/nextpnr/prjxray-db
CHIPDB_DIR=/usr/share/nextpnr/xilinx-chipdb
PART=xc7a35tcsg324-1
VERILOGS=$1
BOARD_FREQ=100
CPU_FREQ=100
set -ex
yosys -DARTY -DBOARD_FREQ=$BOARD_FREQ -DCPU_FREQ=$CPU_FREQ -p "scratchpad -set xilinx_dsp.multonly 1" -p "synth_xilinx -nowidelut -flatten -abc9 -arch xc7 -top SOC; write_json ${PROJECT_NAME}.json" ${VERILOGS}
nextpnr-xilinx --chipdb ${CHIPDB_DIR}/xc7a35t.bin --xdc BOARDS/arty.xdc --json ${PROJECT_NAME}.json --write ${PROJECT_NAME}_routed.json --fasm ${PROJECT_NAME}.fasm
fasm2frames --part ${PART} --db-root ${DB_DIR}/artix7 ${PROJECT_NAME}.fasm > ${PROJECT_NAME}.frames
xc7frames2bit --part_file ${DB_DIR}/artix7/${PART}/part.yaml --part_name ${PART} --frm_file ${PROJECT_NAME}.frames --output_file ${PROJECT_NAME}.bit
#To send to SRAM:
openFPGALoader --board arty ${PROJECT_NAME}.bit
#To send to FLASH:
#openFPGALoader --board arty -f ${PROJECT_NAME}.bit

18
BOARDS/run_cmod_a7.sh Executable file
View File

@@ -0,0 +1,18 @@
#!/usr/bin/env bash
PROJECT_NAME=SOC
DB_DIR=/usr/share/nextpnr/prjxray-db
CHIPDB_DIR=/usr/share/nextpnr/xilinx-chipdb
PART=xc7a35tcpg236-1
VERILOGS=$1
BOARD_FREQ=100
CPU_FREQ=100
set -ex
yosys -DCMODA7 -DBOARD_FREQ=$BOARD_FREQ -DCPU_FREQ=$CPU_FREQ -p "scratchpad -set xilinx_dsp.multonly 1" -p "synth_xilinx -nowidelut -flatten -abc9 -arch xc7 -top SOC; write_json ${PROJECT_NAME}.json" ${VERILOGS}
nextpnr-xilinx --chipdb ${CHIPDB_DIR}/xc7a35tcpg236-1.bin --xdc BOARDS/cmod_a7.xdc --json ${PROJECT_NAME}.json --write ${PROJECT_NAME}_routed.json --fasm ${PROJECT_NAME}.fasm
fasm2frames --part ${PART} --db-root ${DB_DIR}/artix7 ${PROJECT_NAME}.fasm > ${PROJECT_NAME}.frames
xc7frames2bit --part_file ${DB_DIR}/artix7/${PART}/part.yaml --part_name ${PART} --frm_file ${PROJECT_NAME}.frames --output_file ${PROJECT_NAME}.bit
#To send to SRAM:
openFPGALoader --freq 30e6 -c digilent --fpga-part xc7a35 femtosoc.bit
#To send to FLASH:
# openFPGALoader --freq 30e6 -c digilent --fpga-part xc7a35tcpg236 -f femtosoc.bit

13
BOARDS/run_ecp5evn.sh Executable file
View File

@@ -0,0 +1,13 @@
PROJECTNAME=SOC
BOARD=ecp5_evn
BOARD_FREQ=12
CPU_FREQ=100
FPGA_VARIANT=um5g-85k
FPGA_PACKAGE=CABGA381
VERILOGS=$1
yosys -q -DECP5_EVN -DBOARD_FREQ=$BOARD_FREQ -DCPU_FREQ=$CPU_FREQ -p "synth_ecp5 -abc9 -top $PROJECTNAME -json $PROJECTNAME.json" $VERILOGS || exit
nextpnr-ecp5 --force --timing-allow-fail --json $PROJECTNAME.json --lpf BOARDS/$BOARD.lpf --textcfg $PROJECTNAME"_out".config --freq $BOARD_FREQ --$FPGA_VARIANT --package $FPGA_PACKAGE || exit
ecppack --compress --svf-rowsize 100000 --svf $PROJECTNAME".svf" $PROJECTNAME"_out.config" $PROJECTNAME".bit" || exit
ujprog -j FLASH $PROJECTNAME".bit" || exit

30
BOARDS/run_gowin.sh Executable file
View File

@@ -0,0 +1,30 @@
#!/bin/bash
# --- CONFIGURATION ---
PROJECTNAME=SOC
DEVICE='GW2A-LV18PG256C8/I7'
BOARD='tangprimer20k'
BOARD_FREQ=27
CPU_FREQ=50
VERILOGS=$1
# --- Synthesis with Yosys ---
yosys -q -DPRIMER20K -DBOARD_FREQ=$BOARD_FREQ -DCPU_FREQ=$CPU_FREQ -D INV_BTN=0 -p "
read_verilog $VERILOGS;
synth_gowin -top $PROJECTNAME -json $PROJECTNAME.json -family gw2a" || exit 1
# --- Placement and Routing with nextpnr-himbaechel ---
nextpnr-himbaechel \
--json $PROJECTNAME.json \
--write $PROJECTNAME"_pnr.json" \
--device $DEVICE \
--vopt cst=BOARDS/$BOARD.cst \
--vopt family=GW2A-18 \
--freq $BOARD_FREQ || exit 1
# --- Bitstream Packing with gowin_pack ---
gowin_pack -d $DEVICE -o $PROJECTNAME.fs $PROJECTNAME"_pnr.json" || exit 1
# --- Programming with openFPGALoader ---
openFPGALoader -b tangprimer20k $PROJECTNAME.fs || exit 1

14
BOARDS/run_icebreaker.sh Executable file
View File

@@ -0,0 +1,14 @@
PROJECTNAME=SOC
BOARD=icebreaker
BOARD_FREQ=12
CPU_FREQ=20
FPGA_VARIANT=up5k
FPGA_PACKAGE=sg48
VERILOGS=$1
yosys -q -DICE_BREAKER -DNEGATIVE_RESET -DBOARD_FREQ=$BOARD_FREQ -DCPU_FREQ=$CPU_FREQ -p "synth_ice40 -abc9 -device u -dsp -top $PROJECTNAME -json $PROJECTNAME.json" $VERILOGS || exit
nextpnr-ice40 --force --json $PROJECTNAME.json --pcf BOARDS/$BOARD.pcf --asc $PROJECTNAME.asc --freq $BOARD_FREQ --$FPGA_VARIANT --package $FPGA_PACKAGE --pcf-allow-unconstrained || exit
icetime -p BOARDS/$BOARD.pcf -P $FPGA_PACKAGE -r $PROJECTNAME.timings -d up5k -t $PROJECTNAME.asc
icepack $PROJECTNAME.asc $PROJECTNAME.bin || exit
iceprog $PROJECTNAME.bin || exit
echo DONE.

14
BOARDS/run_icestick.sh Executable file
View File

@@ -0,0 +1,14 @@
PROJECTNAME=SOC
BOARD=icestick
BOARD_FREQ=12
CPU_FREQ=45
FPGA_VARIANT=hx1k
FPGA_PACKAGE=tq144
VERILOGS=$1
yosys -q -DICE_STICK -DBOARD_FREQ=$BOARD_FREQ -DCPU_FREQ=$CPU_FREQ -p "synth_ice40 -relut -top $PROJECTNAME -json $PROJECTNAME.json" $VERILOGS || exit
nextpnr-ice40 --force --timing-allow-fail --json $PROJECTNAME.json --pcf BOARDS/$BOARD.pcf --asc $PROJECTNAME.asc --freq $CPU_FREQ --$FPGA_VARIANT --package $FPGA_PACKAGE --pcf-allow-unconstrained --opt-timing || exit
icetime -p BOARDS/$BOARD.pcf -P $FPGA_PACKAGE -r $PROJECTNAME.timings -d hx1k -t $PROJECTNAME.asc
icepack $PROJECTNAME.asc $PROJECTNAME.bin || exit
iceprog $PROJECTNAME.bin || exit
echo DONE.

9
BOARDS/run_icestick_show.sh Executable file
View File

@@ -0,0 +1,9 @@
PROJECTNAME=SOC
BOARD=icestick
BOARD_FREQ=12
CPU_FREQ=45
FPGA_VARIANT=hx1k
FPGA_PACKAGE=tq144
VERILOGS=$1
yosys -q -DICE_STICK -DBOARD_FREQ=$BOARD_FREQ -DCPU_FREQ=$CPU_FREQ -p "synth_ice40 -relut -top $PROJECTNAME -json $PROJECTNAME.json" $VERILOGS || exit
nextpnr-ice40 --gui --force --timing-allow-fail --json $PROJECTNAME.json --pcf BOARDS/$BOARD.pcf --asc $PROJECTNAME.asc --freq $CPU_FREQ --$FPGA_VARIANT --package $FPGA_PACKAGE --pcf-allow-unconstrained --opt-timing || exit

13
BOARDS/run_ulx3s.sh Executable file
View File

@@ -0,0 +1,13 @@
PROJECTNAME=SOC
BOARD=ulx3s
BOARD_FREQ=25
CPU_FREQ=100
FPGA_VARIANT=85k
FPGA_PACKAGE=CABGA381
VERILOGS=$1
yosys -q -DULX3S -DBOARD_FREQ=$BOARD_FREQ -DCPU_FREQ=$CPU_FREQ -p "synth_ecp5 -abc9 -top $PROJECTNAME -json $PROJECTNAME.json" $VERILOGS || exit
nextpnr-ecp5 --force --timing-allow-fail --json $PROJECTNAME.json --lpf BOARDS/$BOARD.lpf --textcfg $PROJECTNAME"_out".config --freq $BOARD_FREQ --$FPGA_VARIANT --package $FPGA_PACKAGE || exit
ecppack --compress --svf-rowsize 100000 --svf $PROJECTNAME".svf" $PROJECTNAME"_out.config" $PROJECTNAME".bit" || exit
ujprog -j FLASH $PROJECTNAME".bit" || exit

143
BOARDS/tangprimer20k.cst Normal file
View File

@@ -0,0 +1,143 @@
IO_LOC "clk" H11;
IO_PORT "clk" IO_TYPE=LVCMOS33;
IO_LOC "key_i" T3;
IO_LOC "rst_i" T10;
IO_PORT "rst_i" IO_TYPE=LVCMOS33;
IO_LOC "clk_i" IOT27A;
IO_LOC "led[0]" C13;
IO_PORT "led[0]" IO_TYPE=LVCMOS33;
IO_LOC "led[1]" A13;
IO_PORT "led[1]" IO_TYPE=LVCMOS33;
IO_LOC "led[2]" N16;
IO_PORT "led[2]" IO_TYPE=LVCMOS33;
IO_LOC "led[3]" N14;
IO_PORT "led[3]" IO_TYPE=LVCMOS33;
IO_LOC "led[4]" L14;
IO_PORT "led[4]" IO_TYPE=LVCMOS33;
IO_LOC "led[5]" L16;
IO_PORT "led[5]" IO_TYPE=LVCMOS33;
IO_LOC "TXD" A15;
IO_PORT "TXD" IO_TYPE=LVCMOS33 PULL_MODE=UP;
IO_LOC "RXD" D14;
IO_PORT "RXD" IO_TYPE=LVCMOS33 PULL_MODE=UP;
// fake
IO_LOC "led[6]" A15;
IO_PORT "led[6]" IO_TYPE=LVCMOS33 PULL_MODE=NONE;
IO_LOC "led[7]" D14;
IO_PORT "led[7]" IO_TYPE=LVCMOS33 PULL_MODE=NONE;
IO_LOC "tlvds_p" P6;
IO_PORT "tlvds_p" IO_TYPE=LVDS25 PULL_MODE=NONE;
IO_LOC "tlvds_n" T6;
IO_PORT "tlvds_n" IO_TYPE=LVDS25 PULL_MODE=NONE;
IO_LOC "elvds_p" C12;
IO_PORT "elvds_p" IO_TYPE=LVDS25 PULL_MODE=NONE;
IO_LOC "elvds_n" B12;
IO_PORT "elvds_n" IO_TYPE=LVDS25 PULL_MODE=NONE;
IO_LOC "LED_R" C13;
IO_PORT "LED_R" IO_TYPE=LVCMOS33;
IO_LOC "LED_G" A13;
IO_PORT "LED_G" IO_TYPE=LVCMOS33;
IO_LOC "LED_B" N16;
IO_PORT "LED_B" IO_TYPE=LVCMOS33;
// oser
IO_LOC "oser_out" C13;
IO_PORT "oser_out" IO_TYPE=LVCMOS33;
IO_LOC "fclk_o" N16;
IO_PORT "fclk_o" IO_TYPE=LVCMOS33;
IO_LOC "pclk_o" N14;
IO_PORT "pclk_o" IO_TYPE=LVCMOS33;
// ides
IO_LOC "fclk_i" B13;
IO_PORT "fclk_i" IO_TYPE=LVCMOS33;
IO_LOC "data_i" C12;
IO_PORT "data_i" IO_TYPE=LVCMOS33;
IO_LOC "q_o[0]" P9;
IO_PORT "q_o[0]" IO_TYPE=LVCMOS33;
IO_LOC "q_o[1]" E15;
IO_PORT "q_o[1]" IO_TYPE=LVCMOS33;
IO_LOC "q_o[2]" T7;
IO_PORT "q_o[2]" IO_TYPE=LVCMOS33;
IO_LOC "q_o[3]" R8;
IO_PORT "q_o[3]" IO_TYPE=LVCMOS33;
IO_LOC "q_o[4]" T6;
IO_PORT "q_o[4]" IO_TYPE=LVCMOS33;
IO_LOC "q_o[5]" P6;
IO_PORT "q_o[5]" IO_TYPE=LVCMOS33;
IO_LOC "q_o[6]" T8;
IO_PORT "q_o[6]" IO_TYPE=LVCMOS33;
IO_LOC "q_o[7]" P8;
IO_PORT "q_o[7]" IO_TYPE=LVCMOS33;
// RGB LCD
IO_LOC "LCD_CLK" R9;
IO_PORT "LCD_CLK" IO_TYPE=LVCMOS33;
IO_LOC "LCD_HYNC" A15;
IO_PORT "LCD_HYNC" IO_TYPE=LVCMOS33;
IO_LOC "LCD_SYNC" D14;
IO_PORT "LCD_SYNC" IO_TYPE=LVCMOS33;
IO_LOC "LCD_DEN" E15;
IO_PORT "LCD_DEN" IO_TYPE=LVCMOS33;
IO_LOC "LCD_R[0]" L9;
IO_PORT "LCD_R[0]" IO_TYPE=LVCMOS33;
IO_LOC "LCD_R[1]" N8;
IO_PORT "LCD_R[1]" IO_TYPE=LVCMOS33;
IO_LOC "LCD_R[2]" N9;
IO_PORT "LCD_R[2]" IO_TYPE=LVCMOS33;
IO_LOC "LCD_R[3]" N7;
IO_PORT "LCD_R[3]" IO_TYPE=LVCMOS33;
IO_LOC "LCD_R[4]" N6;
IO_PORT "LCD_R[4]" IO_TYPE=LVCMOS33;
IO_LOC "LCD_G[0]" D11;
IO_PORT "LCD_G[0]" IO_TYPE=LVCMOS33;
IO_LOC "LCD_G[1]" A11;
IO_PORT "LCD_G[1]" IO_TYPE=LVCMOS33;
IO_LOC "LCD_G[2]" B11;
IO_PORT "LCD_G[2]" IO_TYPE=LVCMOS33;
IO_LOC "LCD_G[3]" P7;
IO_PORT "LCD_G[3]" IO_TYPE=LVCMOS33;
IO_LOC "LCD_G[4]" R7;
IO_PORT "LCD_G[4]" IO_TYPE=LVCMOS33;
IO_LOC "LCD_G[5]" D10;
IO_PORT "LCD_G[5]" IO_TYPE=LVCMOS33;
IO_LOC "LCD_B[0]" B12;
IO_PORT "LCD_B[0]" IO_TYPE=LVCMOS33;
IO_LOC "LCD_B[1]" C12;
IO_PORT "LCD_B[1]" IO_TYPE=LVCMOS33;
IO_LOC "LCD_B[2]" B13;
IO_PORT "LCD_B[2]" IO_TYPE=LVCMOS33;
IO_LOC "LCD_B[3]" A14;
IO_PORT "LCD_B[3]" IO_TYPE=LVCMOS33;
IO_LOC "LCD_B[4]" B14;
IO_PORT "LCD_B[4]" IO_TYPE=LVCMOS33;
// DVI
IO_LOC "tmds_clk_p" G16;
IO_PORT "tmds_clk_p" PULL_MODE=NONE DRIVE=3.5;
IO_LOC "tmds_clk_n" H15;
IO_PORT "tmds_clk_n" PULL_MODE=NONE DRIVE=3.5;
IO_LOC "tmds_d_p[0]" H14;
IO_PORT "tmds_d_p[0]" PULL_MODE=NONE DRIVE=3.5;
IO_LOC "tmds_d_n[0]" H16;
IO_PORT "tmds_d_n[0]" PULL_MODE=NONE DRIVE=3.5;
IO_LOC "tmds_d_p[1]" J15;
IO_PORT "tmds_d_p[1]" PULL_MODE=NONE DRIVE=3.5;
IO_LOC "tmds_d_n[1]" K16;
IO_PORT "tmds_d_n[1]" PULL_MODE=NONE DRIVE=3.5;
IO_LOC "tmds_d_p[2]" K14;
IO_PORT "tmds_d_p[2]" PULL_MODE=NONE DRIVE=3.5;
IO_LOC "tmds_d_n[2]" K15;
IO_PORT "tmds_d_n[2]" PULL_MODE=NONE DRIVE=3.5;
IO_LOC "div_led" C13;
IO_PORT "div_led" IO_TYPE=LVCMOS33;

35
BOARDS/ulx3s.lpf Normal file
View File

@@ -0,0 +1,35 @@
# See https://github.com/emard/ulx3s/blob/master/doc/constraints/ulx3s_v20.lpf
## Clock #########################################
LOCATE COMP "CLK" SITE "G2";
IOBUF PORT "CLK" PULLMODE=NONE IO_TYPE=LVCMOS33;
FREQUENCY PORT "CLK" 25 MHZ;
## RESET button ##################################
LOCATE COMP "RESET" SITE "T1"; # fire 2
IOBUF PORT "RESET" IO_TYPE=LVCMOS33;
## LEDs ##########################################
LOCATE COMP "LEDS[0]" SITE "B2";
LOCATE COMP "LEDS[1]" SITE "C2";
LOCATE COMP "LEDS[2]" SITE "C1";
LOCATE COMP "LEDS[3]" SITE "D2";
LOCATE COMP "LEDS[4]" SITE "D1";
IOBUF PORT "LEDS[0]" IO_TYPE=LVCMOS33;
IOBUF PORT "LEDS[1]" IO_TYPE=LVCMOS33;
IOBUF PORT "LEDS[2]" IO_TYPE=LVCMOS33;
IOBUF PORT "LEDS[3]" IO_TYPE=LVCMOS33;
IOBUF PORT "LEDS[4]" IO_TYPE=LVCMOS33;
## UART ######################################################
LOCATE COMP "TXD" SITE "L4"; # FPGA transmits to ftdi
LOCATE COMP "RXD" SITE "M1"; # FPGA receives from ftdi
IOBUF PORT "TXD" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
IOBUF PORT "RXD" PULLMODE=UP IO_TYPE=LVCMOS33;

View File

@@ -0,0 +1,595 @@
/*
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Original Author: Shay Gal-on
*/
#include "coremark.h"
/*
Topic: Description
Benchmark using a linked list.
Linked list is a common data structure used in many applications.
For our purposes, this will excercise the memory units of the processor.
In particular, usage of the list pointers to find and alter data.
We are not using Malloc since some platforms do not support this
library.
Instead, the memory block being passed in is used to create a list,
and the benchmark takes care not to add more items then can be
accommodated by the memory block. The porting layer will make sure
that we have a valid memory block.
All operations are done in place, without using any extra memory.
The list itself contains list pointers and pointers to data items.
Data items contain the following:
idx - An index that captures the initial order of the list.
data - Variable data initialized based on the input parameters. The 16b
are divided as follows: o Upper 8b are backup of original data. o Bit 7
indicates if the lower 7 bits are to be used as is or calculated. o Bits 0-2
indicate type of operation to perform to get a 7b value. o Bits 3-6 provide
input for the operation.
*/
/* local functions */
list_head *core_list_find(list_head *list, list_data *info);
list_head *core_list_reverse(list_head *list);
list_head *core_list_remove(list_head *item);
list_head *core_list_undo_remove(list_head *item_removed,
list_head *item_modified);
list_head *core_list_insert_new(list_head * insert_point,
list_data * info,
list_head **memblock,
list_data **datablock,
list_head * memblock_end,
list_data * datablock_end);
typedef ee_s32 (*list_cmp)(list_data *a, list_data *b, core_results *res);
list_head *core_list_mergesort(list_head * list,
list_cmp cmp,
core_results *res);
ee_s16
calc_func(ee_s16 *pdata, core_results *res)
{
ee_s16 data = *pdata;
ee_s16 retval;
ee_u8 optype
= (data >> 7)
& 1; /* bit 7 indicates if the function result has been cached */
if (optype) /* if cached, use cache */
return (data & 0x007f);
else
{ /* otherwise calculate and cache the result */
ee_s16 flag = data & 0x7; /* bits 0-2 is type of function to perform */
ee_s16 dtype
= ((data >> 3)
& 0xf); /* bits 3-6 is specific data for the operation */
dtype |= dtype << 4; /* replicate the lower 4 bits to get an 8b value */
switch (flag)
{
case 0:
if (dtype < 0x22) /* set min period for bit corruption */
dtype = 0x22;
retval = core_bench_state(res->size,
res->memblock[3],
res->seed1,
res->seed2,
dtype,
res->crc);
if (res->crcstate == 0)
res->crcstate = retval;
break;
case 1:
retval = core_bench_matrix(&(res->mat), dtype, res->crc);
if (res->crcmatrix == 0)
res->crcmatrix = retval;
break;
default:
retval = data;
break;
}
res->crc = crcu16(retval, res->crc);
retval &= 0x007f;
*pdata = (data & 0xff00) | 0x0080 | retval; /* cache the result */
return retval;
}
}
/* Function: cmp_complex
Compare the data item in a list cell.
Can be used by mergesort.
*/
ee_s32
cmp_complex(list_data *a, list_data *b, core_results *res)
{
ee_s16 val1 = calc_func(&(a->data16), res);
ee_s16 val2 = calc_func(&(b->data16), res);
return val1 - val2;
}
/* Function: cmp_idx
Compare the idx item in a list cell, and regen the data.
Can be used by mergesort.
*/
ee_s32
cmp_idx(list_data *a, list_data *b, core_results *res)
{
if (res == NULL)
{
a->data16 = (a->data16 & 0xff00) | (0x00ff & (a->data16 >> 8));
b->data16 = (b->data16 & 0xff00) | (0x00ff & (b->data16 >> 8));
}
return a->idx - b->idx;
}
void
copy_info(list_data *to, list_data *from)
{
to->data16 = from->data16;
to->idx = from->idx;
}
/* Benchmark for linked list:
- Try to find multiple data items.
- List sort
- Operate on data from list (crc)
- Single remove/reinsert
* At the end of this function, the list is back to original state
*/
ee_u16
core_bench_list(core_results *res, ee_s16 finder_idx)
{
ee_u16 retval = 0;
ee_u16 found = 0, missed = 0;
list_head *list = res->list;
ee_s16 find_num = res->seed3;
list_head *this_find;
list_head *finder, *remover;
list_data info;
ee_s16 i;
info.idx = finder_idx;
/* find <find_num> values in the list, and change the list each time
* (reverse and cache if value found) */
for (i = 0; i < find_num; i++)
{
info.data16 = (i & 0xff);
this_find = core_list_find(list, &info);
list = core_list_reverse(list);
if (this_find == NULL)
{
missed++;
retval += (list->next->info->data16 >> 8) & 1;
}
else
{
found++;
if (this_find->info->data16 & 0x1) /* use found value */
retval += (this_find->info->data16 >> 9) & 1;
/* and cache next item at the head of the list (if any) */
if (this_find->next != NULL)
{
finder = this_find->next;
this_find->next = finder->next;
finder->next = list->next;
list->next = finder;
}
}
if (info.idx >= 0)
info.idx++;
#if CORE_DEBUG
ee_printf("List find %d: [%d,%d,%d]\n", i, retval, missed, found);
#endif
}
retval += found * 4 - missed;
/* sort the list by data content and remove one item*/
if (finder_idx > 0)
list = core_list_mergesort(list, cmp_complex, res);
remover = core_list_remove(list->next);
/* CRC data content of list from location of index N forward, and then undo
* remove */
finder = core_list_find(list, &info);
if (!finder)
finder = list->next;
while (finder)
{
retval = crc16(list->info->data16, retval);
finder = finder->next;
}
#if CORE_DEBUG
ee_printf("List sort 1: %04x\n", retval);
#endif
remover = core_list_undo_remove(remover, list->next);
/* sort the list by index, in effect returning the list to original state */
list = core_list_mergesort(list, cmp_idx, NULL);
/* CRC data content of list */
finder = list->next;
while (finder)
{
retval = crc16(list->info->data16, retval);
finder = finder->next;
}
#if CORE_DEBUG
ee_printf("List sort 2: %04x\n", retval);
#endif
return retval;
}
/* Function: core_list_init
Initialize list with data.
Parameters:
blksize - Size of memory to be initialized.
memblock - Pointer to memory block.
seed - Actual values chosen depend on the seed parameter.
The seed parameter MUST be supplied from a source that cannot be
determined at compile time
Returns:
Pointer to the head of the list.
*/
list_head *
core_list_init(ee_u32 blksize, list_head *memblock, ee_s16 seed)
{
/* calculated pointers for the list */
ee_u32 per_item = 16 + sizeof(struct list_data_s);
ee_u32 size = (blksize / per_item)
- 2; /* to accommodate systems with 64b pointers, and make sure
same code is executed, set max list elements */
list_head *memblock_end = memblock + size;
list_data *datablock = (list_data *)(memblock_end);
list_data *datablock_end = datablock + size;
/* some useful variables */
ee_u32 i;
list_head *finder, *list = memblock;
list_data info;
/* create a fake items for the list head and tail */
list->next = NULL;
list->info = datablock;
list->info->idx = 0x0000;
list->info->data16 = (ee_s16)0x8080;
memblock++;
datablock++;
info.idx = 0x7fff;
info.data16 = (ee_s16)0xffff;
core_list_insert_new(
list, &info, &memblock, &datablock, memblock_end, datablock_end);
/* then insert size items */
for (i = 0; i < size; i++)
{
ee_u16 datpat = ((ee_u16)(seed ^ i) & 0xf);
ee_u16 dat
= (datpat << 3) | (i & 0x7); /* alternate between algorithms */
info.data16 = (dat << 8) | dat; /* fill the data with actual data and
upper bits with rebuild value */
core_list_insert_new(
list, &info, &memblock, &datablock, memblock_end, datablock_end);
}
/* and now index the list so we know initial seed order of the list */
finder = list->next;
i = 1;
while (finder->next != NULL)
{
if (i < size / 5) /* first 20% of the list in order */
finder->info->idx = i++;
else
{
ee_u16 pat = (ee_u16)(i++ ^ seed); /* get a pseudo random number */
finder->info->idx = 0x3fff
& (((i & 0x07) << 8)
| pat); /* make sure the mixed items end up
after the ones in sequence */
}
finder = finder->next;
}
list = core_list_mergesort(list, cmp_idx, NULL);
#if CORE_DEBUG
ee_printf("Initialized list:\n");
finder = list;
while (finder)
{
ee_printf(
"[%04x,%04x]", finder->info->idx, (ee_u16)finder->info->data16);
finder = finder->next;
}
ee_printf("\n");
#endif
return list;
}
/* Function: core_list_insert
Insert an item to the list
Parameters:
insert_point - where to insert the item.
info - data for the cell.
memblock - pointer for the list header
datablock - pointer for the list data
memblock_end - end of region for list headers
datablock_end - end of region for list data
Returns:
Pointer to new item.
*/
list_head *
core_list_insert_new(list_head * insert_point,
list_data * info,
list_head **memblock,
list_data **datablock,
list_head * memblock_end,
list_data * datablock_end)
{
list_head *newitem;
if ((*memblock + 1) >= memblock_end)
return NULL;
if ((*datablock + 1) >= datablock_end)
return NULL;
newitem = *memblock;
(*memblock)++;
newitem->next = insert_point->next;
insert_point->next = newitem;
newitem->info = *datablock;
(*datablock)++;
copy_info(newitem->info, info);
return newitem;
}
/* Function: core_list_remove
Remove an item from the list.
Operation:
For a singly linked list, remove by copying the data from the next item
over to the current cell, and unlinking the next item.
Note:
since there is always a fake item at the end of the list, no need to
check for NULL.
Returns:
Removed item.
*/
list_head *
core_list_remove(list_head *item)
{
list_data *tmp;
list_head *ret = item->next;
/* swap data pointers */
tmp = item->info;
item->info = ret->info;
ret->info = tmp;
/* and eliminate item */
item->next = item->next->next;
ret->next = NULL;
return ret;
}
/* Function: core_list_undo_remove
Undo a remove operation.
Operation:
Since we want each iteration of the benchmark to be exactly the same,
we need to be able to undo a remove.
Link the removed item back into the list, and switch the info items.
Parameters:
item_removed - Return value from the <core_list_remove>
item_modified - List item that was modified during <core_list_remove>
Returns:
The item that was linked back to the list.
*/
list_head *
core_list_undo_remove(list_head *item_removed, list_head *item_modified)
{
list_data *tmp;
/* swap data pointers */
tmp = item_removed->info;
item_removed->info = item_modified->info;
item_modified->info = tmp;
/* and insert item */
item_removed->next = item_modified->next;
item_modified->next = item_removed;
return item_removed;
}
/* Function: core_list_find
Find an item in the list
Operation:
Find an item by idx (if not 0) or specific data value
Parameters:
list - list head
info - idx or data to find
Returns:
Found item, or NULL if not found.
*/
list_head *
core_list_find(list_head *list, list_data *info)
{
if (info->idx >= 0)
{
while (list && (list->info->idx != info->idx))
list = list->next;
return list;
}
else
{
while (list && ((list->info->data16 & 0xff) != info->data16))
list = list->next;
return list;
}
}
/* Function: core_list_reverse
Reverse a list
Operation:
Rearrange the pointers so the list is reversed.
Parameters:
list - list head
info - idx or data to find
Returns:
Found item, or NULL if not found.
*/
list_head *
core_list_reverse(list_head *list)
{
list_head *next = NULL, *tmp;
while (list)
{
tmp = list->next;
list->next = next;
next = list;
list = tmp;
}
return next;
}
/* Function: core_list_mergesort
Sort the list in place without recursion.
Description:
Use mergesort, as for linked list this is a realistic solution.
Also, since this is aimed at embedded, care was taken to use iterative
rather then recursive algorithm. The sort can either return the list to
original order (by idx) , or use the data item to invoke other other
algorithms and change the order of the list.
Parameters:
list - list to be sorted.
cmp - cmp function to use
Returns:
New head of the list.
Note:
We have a special header for the list that will always be first,
but the algorithm could theoretically modify where the list starts.
*/
list_head *
core_list_mergesort(list_head *list, list_cmp cmp, core_results *res)
{
list_head *p, *q, *e, *tail;
ee_s32 insize, nmerges, psize, qsize, i;
insize = 1;
while (1)
{
p = list;
list = NULL;
tail = NULL;
nmerges = 0; /* count number of merges we do in this pass */
while (p)
{
nmerges++; /* there exists a merge to be done */
/* step `insize' places along from p */
q = p;
psize = 0;
for (i = 0; i < insize; i++)
{
psize++;
q = q->next;
if (!q)
break;
}
/* if q hasn't fallen off end, we have two lists to merge */
qsize = insize;
/* now we have two lists; merge them */
while (psize > 0 || (qsize > 0 && q))
{
/* decide whether next element of merge comes from p or q */
if (psize == 0)
{
/* p is empty; e must come from q. */
e = q;
q = q->next;
qsize--;
}
else if (qsize == 0 || !q)
{
/* q is empty; e must come from p. */
e = p;
p = p->next;
psize--;
}
else if (cmp(p->info, q->info, res) <= 0)
{
/* First element of p is lower (or same); e must come from
* p. */
e = p;
p = p->next;
psize--;
}
else
{
/* First element of q is lower; e must come from q. */
e = q;
q = q->next;
qsize--;
}
/* add the next element to the merged list */
if (tail)
{
tail->next = e;
}
else
{
list = e;
}
tail = e;
}
/* now p has stepped `insize' places along, and q has too */
p = q;
}
tail->next = NULL;
/* If we have done only one merge, we're finished. */
if (nmerges <= 1) /* allow for nmerges==0, the empty list case */
return list;
/* Otherwise repeat, merging lists twice the size */
insize *= 2;
}
#if COMPILER_REQUIRES_SORT_RETURN
return list;
#endif
}

View File

@@ -0,0 +1,451 @@
/*
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Original Author: Shay Gal-on
*/
/* File: core_main.c
This file contains the framework to acquire a block of memory, seed
initial parameters, tun t he benchmark and report the results.
*/
#include "coremark.h"
/* Function: iterate
Run the benchmark for a specified number of iterations.
Operation:
For each type of benchmarked algorithm:
a - Initialize the data block for the algorithm.
b - Execute the algorithm N times.
Returns:
NULL.
*/
static ee_u16 list_known_crc[] = { (ee_u16)0xd4b0,
(ee_u16)0x3340,
(ee_u16)0x6a79,
(ee_u16)0xe714,
(ee_u16)0xe3c1 };
static ee_u16 matrix_known_crc[] = { (ee_u16)0xbe52,
(ee_u16)0x1199,
(ee_u16)0x5608,
(ee_u16)0x1fd7,
(ee_u16)0x0747 };
static ee_u16 state_known_crc[] = { (ee_u16)0x5e47,
(ee_u16)0x39bf,
(ee_u16)0xe5a4,
(ee_u16)0x8e3a,
(ee_u16)0x8d84 };
void *
iterate(void *pres)
{
ee_u32 i;
ee_u16 crc;
core_results *res = (core_results *)pres;
ee_u32 iterations = res->iterations;
res->crc = 0;
res->crclist = 0;
res->crcmatrix = 0;
res->crcstate = 0;
for (i = 0; i < iterations; i++)
{
crc = core_bench_list(res, 1);
res->crc = crcu16(crc, res->crc);
crc = core_bench_list(res, -1);
res->crc = crcu16(crc, res->crc);
if (i == 0)
res->crclist = res->crc;
}
return NULL;
}
#if (SEED_METHOD == SEED_ARG)
ee_s32 get_seed_args(int i, int argc, char *argv[]);
#define get_seed(x) (ee_s16) get_seed_args(x, argc, argv)
#define get_seed_32(x) get_seed_args(x, argc, argv)
#else /* via function or volatile */
ee_s32 get_seed_32(int i);
#define get_seed(x) (ee_s16) get_seed_32(x)
#endif
#if (MEM_METHOD == MEM_STATIC)
ee_u8 static_memblk[TOTAL_DATA_SIZE];
#endif
char *mem_name[3] = { "Static", "Heap", "Stack" };
/* Function: main
Main entry routine for the benchmark.
This function is responsible for the following steps:
1 - Initialize input seeds from a source that cannot be determined at
compile time. 2 - Initialize memory block for use. 3 - Run and time the
benchmark. 4 - Report results, testing the validity of the output if the
seeds are known.
Arguments:
1 - first seed : Any value
2 - second seed : Must be identical to first for iterations to be
identical 3 - third seed : Any value, should be at least an order of
magnitude less then the input size, but bigger then 32. 4 - Iterations :
Special, if set to 0, iterations will be automatically determined such that
the benchmark will run between 10 to 100 secs
*/
#if MAIN_HAS_NOARGC
MAIN_RETURN_TYPE
main(void)
{
int argc = 0;
char *argv[1];
#else
MAIN_RETURN_TYPE
main(int argc, char *argv[])
{
#endif
ee_u16 i, j = 0, num_algorithms = 0;
ee_s16 known_id = -1, total_errors = 0;
ee_u16 seedcrc = 0;
CORE_TICKS total_time;
core_results results[MULTITHREAD];
#if (MEM_METHOD == MEM_STACK)
ee_u8 stack_memblock[TOTAL_DATA_SIZE * MULTITHREAD];
#endif
/* first call any initializations needed */
portable_init(&(results[0].port), &argc, argv);
/* First some checks to make sure benchmark will run ok */
if (sizeof(struct list_head_s) > 128)
{
ee_printf("list_head structure too big for comparable data!\n");
return MAIN_RETURN_VAL;
}
results[0].seed1 = get_seed(1);
results[0].seed2 = get_seed(2);
results[0].seed3 = get_seed(3);
results[0].iterations = get_seed_32(4);
#if CORE_DEBUG
results[0].iterations = 1;
#endif
results[0].execs = get_seed_32(5);
if (results[0].execs == 0)
{ /* if not supplied, execute all algorithms */
results[0].execs = ALL_ALGORITHMS_MASK;
}
/* put in some default values based on one seed only for easy testing */
if ((results[0].seed1 == 0) && (results[0].seed2 == 0)
&& (results[0].seed3 == 0))
{ /* performance run */
results[0].seed1 = 0;
results[0].seed2 = 0;
results[0].seed3 = 0x66;
}
if ((results[0].seed1 == 1) && (results[0].seed2 == 0)
&& (results[0].seed3 == 0))
{ /* validation run */
results[0].seed1 = 0x3415;
results[0].seed2 = 0x3415;
results[0].seed3 = 0x66;
}
#if (MEM_METHOD == MEM_STATIC)
results[0].memblock[0] = (void *)static_memblk;
results[0].size = TOTAL_DATA_SIZE;
results[0].err = 0;
#if (MULTITHREAD > 1)
#error "Cannot use a static data area with multiple contexts!"
#endif
#elif (MEM_METHOD == MEM_MALLOC)
for (i = 0; i < MULTITHREAD; i++)
{
ee_s32 malloc_override = get_seed(7);
if (malloc_override != 0)
results[i].size = malloc_override;
else
results[i].size = TOTAL_DATA_SIZE;
results[i].memblock[0] = portable_malloc(results[i].size);
results[i].seed1 = results[0].seed1;
results[i].seed2 = results[0].seed2;
results[i].seed3 = results[0].seed3;
results[i].err = 0;
results[i].execs = results[0].execs;
}
#elif (MEM_METHOD == MEM_STACK)
for (i = 0; i < MULTITHREAD; i++)
{
results[i].memblock[0] = stack_memblock + i * TOTAL_DATA_SIZE;
results[i].size = TOTAL_DATA_SIZE;
results[i].seed1 = results[0].seed1;
results[i].seed2 = results[0].seed2;
results[i].seed3 = results[0].seed3;
results[i].err = 0;
results[i].execs = results[0].execs;
}
#else
#error "Please define a way to initialize a memory block."
#endif
/* Data init */
/* Find out how space much we have based on number of algorithms */
for (i = 0; i < NUM_ALGORITHMS; i++)
{
if ((1 << (ee_u32)i) & results[0].execs)
num_algorithms++;
}
for (i = 0; i < MULTITHREAD; i++)
results[i].size = results[i].size / num_algorithms;
/* Assign pointers */
for (i = 0; i < NUM_ALGORITHMS; i++)
{
ee_u32 ctx;
if ((1 << (ee_u32)i) & results[0].execs)
{
for (ctx = 0; ctx < MULTITHREAD; ctx++)
results[ctx].memblock[i + 1]
= (char *)(results[ctx].memblock[0]) + results[0].size * j;
j++;
}
}
/* call inits */
for (i = 0; i < MULTITHREAD; i++)
{
if (results[i].execs & ID_LIST)
{
results[i].list = core_list_init(
results[0].size, results[i].memblock[1], results[i].seed1);
}
if (results[i].execs & ID_MATRIX)
{
core_init_matrix(results[0].size,
results[i].memblock[2],
(ee_s32)results[i].seed1
| (((ee_s32)results[i].seed2) << 16),
&(results[i].mat));
}
if (results[i].execs & ID_STATE)
{
core_init_state(
results[0].size, results[i].seed1, results[i].memblock[3]);
}
}
/* automatically determine number of iterations if not set */
if (results[0].iterations == 0)
{
secs_ret secs_passed = 0;
ee_u32 divisor;
results[0].iterations = 1;
while (secs_passed < (secs_ret)1)
{
results[0].iterations *= 10;
start_time();
iterate(&results[0]);
stop_time();
secs_passed = time_in_secs(get_time());
}
/* now we know it executes for at least 1 sec, set actual run time at
* about 10 secs */
divisor = (ee_u32)secs_passed;
if (divisor == 0) /* some machines cast float to int as 0 since this
conversion is not defined by ANSI, but we know at
least one second passed */
divisor = 1;
results[0].iterations *= 1 + 10 / divisor;
}
/* perform actual benchmark */
start_time();
#if (MULTITHREAD > 1)
if (default_num_contexts > MULTITHREAD)
{
default_num_contexts = MULTITHREAD;
}
for (i = 0; i < default_num_contexts; i++)
{
results[i].iterations = results[0].iterations;
results[i].execs = results[0].execs;
core_start_parallel(&results[i]);
}
for (i = 0; i < default_num_contexts; i++)
{
core_stop_parallel(&results[i]);
}
#else
iterate(&results[0]);
#endif
stop_time();
total_time = get_time();
/* get a function of the input to report */
seedcrc = crc16(results[0].seed1, seedcrc);
seedcrc = crc16(results[0].seed2, seedcrc);
seedcrc = crc16(results[0].seed3, seedcrc);
seedcrc = crc16(results[0].size, seedcrc);
switch (seedcrc)
{ /* test known output for common seeds */
case 0x8a02: /* seed1=0, seed2=0, seed3=0x66, size 2000 per algorithm */
known_id = 0;
ee_printf("6k performance run parameters for coremark.\n");
break;
case 0x7b05: /* seed1=0x3415, seed2=0x3415, seed3=0x66, size 2000 per
algorithm */
known_id = 1;
ee_printf("6k validation run parameters for coremark.\n");
break;
case 0x4eaf: /* seed1=0x8, seed2=0x8, seed3=0x8, size 400 per algorithm
*/
known_id = 2;
ee_printf("Profile generation run parameters for coremark.\n");
break;
case 0xe9f5: /* seed1=0, seed2=0, seed3=0x66, size 666 per algorithm */
known_id = 3;
ee_printf("2K performance run parameters for coremark.\n");
break;
case 0x18f2: /* seed1=0x3415, seed2=0x3415, seed3=0x66, size 666 per
algorithm */
known_id = 4;
ee_printf("2K validation run parameters for coremark.\n");
break;
default:
total_errors = -1;
break;
}
if (known_id >= 0)
{
for (i = 0; i < default_num_contexts; i++)
{
results[i].err = 0;
if ((results[i].execs & ID_LIST)
&& (results[i].crclist != list_known_crc[known_id]))
{
ee_printf("[%u]ERROR! list crc 0x%04x - should be 0x%04x\n",
i,
results[i].crclist,
list_known_crc[known_id]);
results[i].err++;
}
if ((results[i].execs & ID_MATRIX)
&& (results[i].crcmatrix != matrix_known_crc[known_id]))
{
ee_printf("[%u]ERROR! matrix crc 0x%04x - should be 0x%04x\n",
i,
results[i].crcmatrix,
matrix_known_crc[known_id]);
results[i].err++;
}
if ((results[i].execs & ID_STATE)
&& (results[i].crcstate != state_known_crc[known_id]))
{
ee_printf("[%u]ERROR! state crc 0x%04x - should be 0x%04x\n",
i,
results[i].crcstate,
state_known_crc[known_id]);
results[i].err++;
}
total_errors += results[i].err;
}
}
total_errors += check_data_types();
/* and report results */
ee_printf("CoreMark Size : %lu\n", (long unsigned)results[0].size);
ee_printf("Total ticks : %lu\n", (long unsigned)total_time);
#if HAS_FLOAT
ee_printf("Total time (secs): %f\n", time_in_secs(total_time));
if (time_in_secs(total_time) > 0)
ee_printf("Iterations/Sec : %f\n",
(default_num_contexts * results[0].iterations)
/ time_in_secs(total_time));
#else
/*
ee_printf("Total time (secs): %d\n", time_in_secs(total_time));
if (time_in_secs(total_time) > 0)
ee_printf("Iterations/Sec : %d\n",
default_num_contexts * results[0].iterations
/ time_in_secs(total_time));
*/
#endif
print_coremarks(total_time);
if (time_in_secs(total_time) < 10)
{
ee_printf(
"ERROR! Must execute for at least 10 secs for a valid result!\n");
// total_errors++;
}
/*
ee_printf("Iterations : %lu\n",
(long unsigned)default_num_contexts * results[0].iterations);
ee_printf("Compiler version : %s\n", COMPILER_VERSION);
ee_printf("Compiler flags : %s\n", COMPILER_FLAGS);
*/
#if (MULTITHREAD > 1)
ee_printf("Parallel %s : %d\n", PARALLEL_METHOD, default_num_contexts);
#endif
ee_printf("Memory location : %s\n", MEM_LOCATION);
/* output for verification */
ee_printf("seedcrc : 0x%04x\n", seedcrc);
if (results[0].execs & ID_LIST)
for (i = 0; i < default_num_contexts; i++)
ee_printf("[%d]crclist : 0x%04x\n", i, results[i].crclist);
if (results[0].execs & ID_MATRIX)
for (i = 0; i < default_num_contexts; i++)
ee_printf("[%d]crcmatrix : 0x%04x\n", i, results[i].crcmatrix);
if (results[0].execs & ID_STATE)
for (i = 0; i < default_num_contexts; i++)
ee_printf("[%d]crcstate : 0x%04x\n", i, results[i].crcstate);
for (i = 0; i < default_num_contexts; i++)
ee_printf("[%d]crcfinal : 0x%04x\n", i, results[i].crc);
if (total_errors == 0)
{
ee_printf(
"Correct operation validated. See README.md for run and reporting "
"rules.\n");
#if HAS_FLOAT
if (known_id == 3)
{
ee_printf("CoreMark 1.0 : %f / %s %s",
default_num_contexts * results[0].iterations
/ time_in_secs(total_time),
COMPILER_VERSION,
COMPILER_FLAGS);
#if defined(MEM_LOCATION) && !defined(MEM_LOCATION_UNSPEC)
ee_printf(" / %s", MEM_LOCATION);
#else
ee_printf(" / %s", mem_name[MEM_METHOD]);
#endif
#if (MULTITHREAD > 1)
ee_printf(" / %d:%s", default_num_contexts, PARALLEL_METHOD);
#endif
ee_printf("\n");
}
#endif
}
if (total_errors > 0)
ee_printf("Errors detected\n");
if (total_errors < 0)
ee_printf(
"Cannot validate operation for these seed values, please compare "
"with results on a known platform.\n");
#if (MEM_METHOD == MEM_MALLOC)
for (i = 0; i < MULTITHREAD; i++)
portable_free(results[i].memblock[0]);
#endif
/* And last call any target specific code for finalizing */
portable_fini(&(results[0].port));
return MAIN_RETURN_VAL;
}

View File

@@ -0,0 +1,359 @@
/*
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Original Author: Shay Gal-on
*/
#include "coremark.h"
/*
Topic: Description
Matrix manipulation benchmark
This very simple algorithm forms the basis of many more complex
algorithms.
The tight inner loop is the focus of many optimizations (compiler as
well as hardware based) and is thus relevant for embedded processing.
The total available data space will be divided to 3 parts:
NxN Matrix A - initialized with small values (upper 3/4 of the bits all
zero). NxN Matrix B - initialized with medium values (upper half of the bits all
zero). NxN Matrix C - used for the result.
The actual values for A and B must be derived based on input that is not
available at compile time.
*/
ee_s16 matrix_test(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B, MATDAT val);
ee_s16 matrix_sum(ee_u32 N, MATRES *C, MATDAT clipval);
void matrix_mul_const(ee_u32 N, MATRES *C, MATDAT *A, MATDAT val);
void matrix_mul_vect(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B);
void matrix_mul_matrix(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B);
void matrix_mul_matrix_bitextract(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B);
void matrix_add_const(ee_u32 N, MATDAT *A, MATDAT val);
#define matrix_test_next(x) (x + 1)
#define matrix_clip(x, y) ((y) ? (x)&0x0ff : (x)&0x0ffff)
#define matrix_big(x) (0xf000 | (x))
#define bit_extract(x, from, to) (((x) >> (from)) & (~(0xffffffff << (to))))
#if CORE_DEBUG
void
printmat(MATDAT *A, ee_u32 N, char *name)
{
ee_u32 i, j;
ee_printf("Matrix %s [%dx%d]:\n", name, N, N);
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
{
if (j != 0)
ee_printf(",");
ee_printf("%d", A[i * N + j]);
}
ee_printf("\n");
}
}
void
printmatC(MATRES *C, ee_u32 N, char *name)
{
ee_u32 i, j;
ee_printf("Matrix %s [%dx%d]:\n", name, N, N);
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
{
if (j != 0)
ee_printf(",");
ee_printf("%d", C[i * N + j]);
}
ee_printf("\n");
}
}
#endif
/* Function: core_bench_matrix
Benchmark function
Iterate <matrix_test> N times,
changing the matrix values slightly by a constant amount each time.
*/
ee_u16
core_bench_matrix(mat_params *p, ee_s16 seed, ee_u16 crc)
{
ee_u32 N = p->N;
MATRES *C = p->C;
MATDAT *A = p->A;
MATDAT *B = p->B;
MATDAT val = (MATDAT)seed;
crc = crc16(matrix_test(N, C, A, B, val), crc);
return crc;
}
/* Function: matrix_test
Perform matrix manipulation.
Parameters:
N - Dimensions of the matrix.
C - memory for result matrix.
A - input matrix
B - operator matrix (not changed during operations)
Returns:
A CRC value that captures all results calculated in the function.
In particular, crc of the value calculated on the result matrix
after each step by <matrix_sum>.
Operation:
1 - Add a constant value to all elements of a matrix.
2 - Multiply a matrix by a constant.
3 - Multiply a matrix by a vector.
4 - Multiply a matrix by a matrix.
5 - Add a constant value to all elements of a matrix.
After the last step, matrix A is back to original contents.
*/
ee_s16
matrix_test(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B, MATDAT val)
{
ee_u16 crc = 0;
MATDAT clipval = matrix_big(val);
matrix_add_const(N, A, val); /* make sure data changes */
#if CORE_DEBUG
printmat(A, N, "matrix_add_const");
#endif
matrix_mul_const(N, C, A, val);
crc = crc16(matrix_sum(N, C, clipval), crc);
#if CORE_DEBUG
printmatC(C, N, "matrix_mul_const");
#endif
matrix_mul_vect(N, C, A, B);
crc = crc16(matrix_sum(N, C, clipval), crc);
#if CORE_DEBUG
printmatC(C, N, "matrix_mul_vect");
#endif
matrix_mul_matrix(N, C, A, B);
crc = crc16(matrix_sum(N, C, clipval), crc);
#if CORE_DEBUG
printmatC(C, N, "matrix_mul_matrix");
#endif
matrix_mul_matrix_bitextract(N, C, A, B);
crc = crc16(matrix_sum(N, C, clipval), crc);
#if CORE_DEBUG
printmatC(C, N, "matrix_mul_matrix_bitextract");
#endif
matrix_add_const(N, A, -val); /* return matrix to initial value */
return crc;
}
/* Function : matrix_init
Initialize the memory block for matrix benchmarking.
Parameters:
blksize - Size of memory to be initialized.
memblk - Pointer to memory block.
seed - Actual values chosen depend on the seed parameter.
p - pointers to <mat_params> containing initialized matrixes.
Returns:
Matrix dimensions.
Note:
The seed parameter MUST be supplied from a source that cannot be
determined at compile time
*/
ee_u32
core_init_matrix(ee_u32 blksize, void *memblk, ee_s32 seed, mat_params *p)
{
ee_u32 N = 0;
MATDAT *A;
MATDAT *B;
ee_s32 order = 1;
MATDAT val;
ee_u32 i = 0, j = 0;
if (seed == 0)
seed = 1;
while (j < blksize)
{
i++;
j = i * i * 2 * 4;
}
N = i - 1;
A = (MATDAT *)align_mem(memblk);
B = A + N * N;
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
{
seed = ((order * seed) % 65536);
val = (seed + order);
val = matrix_clip(val, 0);
B[i * N + j] = val;
val = (val + order);
val = matrix_clip(val, 1);
A[i * N + j] = val;
order++;
}
}
p->A = A;
p->B = B;
p->C = (MATRES *)align_mem(B + N * N);
p->N = N;
#if CORE_DEBUG
printmat(A, N, "A");
printmat(B, N, "B");
#endif
return N;
}
/* Function: matrix_sum
Calculate a function that depends on the values of elements in the
matrix.
For each element, accumulate into a temporary variable.
As long as this value is under the parameter clipval,
add 1 to the result if the element is bigger then the previous.
Otherwise, reset the accumulator and add 10 to the result.
*/
ee_s16
matrix_sum(ee_u32 N, MATRES *C, MATDAT clipval)
{
MATRES tmp = 0, prev = 0, cur = 0;
ee_s16 ret = 0;
ee_u32 i, j;
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
{
cur = C[i * N + j];
tmp += cur;
if (tmp > clipval)
{
ret += 10;
tmp = 0;
}
else
{
ret += (cur > prev) ? 1 : 0;
}
prev = cur;
}
}
return ret;
}
/* Function: matrix_mul_const
Multiply a matrix by a constant.
This could be used as a scaler for instance.
*/
void
matrix_mul_const(ee_u32 N, MATRES *C, MATDAT *A, MATDAT val)
{
ee_u32 i, j;
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
{
C[i * N + j] = (MATRES)A[i * N + j] * (MATRES)val;
}
}
}
/* Function: matrix_add_const
Add a constant value to all elements of a matrix.
*/
void
matrix_add_const(ee_u32 N, MATDAT *A, MATDAT val)
{
ee_u32 i, j;
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
{
A[i * N + j] += val;
}
}
}
/* Function: matrix_mul_vect
Multiply a matrix by a vector.
This is common in many simple filters (e.g. fir where a vector of
coefficients is applied to the matrix.)
*/
void
matrix_mul_vect(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B)
{
ee_u32 i, j;
for (i = 0; i < N; i++)
{
C[i] = 0;
for (j = 0; j < N; j++)
{
C[i] += (MATRES)A[i * N + j] * (MATRES)B[j];
}
}
}
/* Function: matrix_mul_matrix
Multiply a matrix by a matrix.
Basic code is used in many algorithms, mostly with minor changes such as
scaling.
*/
void
matrix_mul_matrix(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B)
{
ee_u32 i, j, k;
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
{
C[i * N + j] = 0;
for (k = 0; k < N; k++)
{
C[i * N + j] += (MATRES)A[i * N + k] * (MATRES)B[k * N + j];
}
}
}
}
/* Function: matrix_mul_matrix_bitextract
Multiply a matrix by a matrix, and extract some bits from the result.
Basic code is used in many algorithms, mostly with minor changes such as
scaling.
*/
void
matrix_mul_matrix_bitextract(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B)
{
ee_u32 i, j, k;
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
{
C[i * N + j] = 0;
for (k = 0; k < N; k++)
{
MATRES tmp = (MATRES)A[i * N + k] * (MATRES)B[k * N + j];
C[i * N + j] += bit_extract(tmp, 2, 4) * bit_extract(tmp, 5, 7);
}
}
}
}

View File

@@ -0,0 +1,215 @@
/*
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Original Author: Shay Gal-on
*/
#include <io.h>
#include <stdio.h>
#include "coremark.h"
#include "core_portme.h"
#include <perf.h>
#if VALIDATION_RUN
volatile ee_s32 seed1_volatile = 0x3415;
volatile ee_s32 seed2_volatile = 0x3415;
volatile ee_s32 seed3_volatile = 0x66;
#endif
#if PERFORMANCE_RUN
volatile ee_s32 seed1_volatile = 0x0;
volatile ee_s32 seed2_volatile = 0x0;
volatile ee_s32 seed3_volatile = 0x66;
#endif
#if PROFILE_RUN
volatile ee_s32 seed1_volatile = 0x8;
volatile ee_s32 seed2_volatile = 0x8;
volatile ee_s32 seed3_volatile = 0x8;
#endif
volatile ee_s32 seed4_volatile = ITERATIONS;
volatile ee_s32 seed5_volatile = 0;
/* Porting : Timing functions
How to capture time and convert to seconds must be ported to whatever is
supported by the platform. e.g. Read value from on board RTC, read value from
cpu clock cycles performance counter etc. Sample implementation for standard
time.h and windows.h definitions included.
*/
CORETIMETYPE barebones_clock()
{
return (CORETIMETYPE)(rdcycle());
}
/* Define : TIMER_RES_DIVIDER
Divider to trade off timer resolution and total time that can be
measured.
Use lower values to increase resolution, but make sure that overflow
does not occur. If there are issues with the return value overflowing,
increase this value.
*/
#define CLOCKS_PER_SEC 10000000
#define GETMYTIME(_t) (*_t = barebones_clock())
#define MYTIMEDIFF(fin, ini) ((fin) - (ini))
#define TIMER_RES_DIVIDER 1
#define SAMPLE_TIME_IMPLEMENTATION 1
#define EE_TICKS_PER_SEC (CLOCKS_PER_SEC / TIMER_RES_DIVIDER)
/** Define Host specific (POSIX), or target specific global time variables. */
static CORETIMETYPE start_time_val, stop_time_val;
/* Function : start_time
This function will be called right before starting the timed portion of
the benchmark.
Implementation may be capturing a system timer (as implemented in the
example code) or zeroing some system parameters - e.g. setting the cpu clocks
cycles to 0.
*/
void
start_time(void)
{
GETMYTIME(&start_time_val);
}
/* Function : stop_time
This function will be called right after ending the timed portion of the
benchmark.
Implementation may be capturing a system timer (as implemented in the
example code) or other system parameters - e.g. reading the current value of
cpu cycles counter.
*/
void
stop_time(void)
{
GETMYTIME(&stop_time_val);
}
/* Function : get_time
Return an abstract "ticks" number that signifies time on the system.
Actual value returned may be cpu cycles, milliseconds or any other
value, as long as it can be converted to seconds by <time_in_secs>. This
methodology is taken to accommodate any hardware or simulated platform. The
sample implementation returns millisecs by default, and the resolution is
controlled by <TIMER_RES_DIVIDER>
*/
CORE_TICKS
get_time(void)
{
CORE_TICKS elapsed
= (CORE_TICKS)(MYTIMEDIFF(stop_time_val, start_time_val));
return elapsed;
}
/* Function : time_in_secs
Convert the value returned by get_time to seconds.
The <secs_ret> type is used to accommodate systems with no support for
floating point. Default implementation implemented by the EE_TICKS_PER_SEC
macro above.
*/
secs_ret
time_in_secs(CORE_TICKS ticks)
{
secs_ret retval = ((secs_ret)ticks) / (secs_ret)EE_TICKS_PER_SEC;
return retval;
}
ee_u32 default_num_contexts = 1;
/* Function : portable_init
Target specific initialization code
Test for some common mistakes.
*/
void
portable_init(core_portable *p, int *argc, char *argv[])
{
//usleep(100);
//io.led = 0xF;
// ee_printf("board: %s (id=%d)\n",board_name(io.board_id),io.board_id);
ee_printf("build: %s for %s\n",BUILD,ARCH);
// ee_printf("core%d: ", io.core_id); // core id
// ee_printf("darkriscv@%dMHz with: ",io.board_cm*2); // board clock MHz
// ee_printf("rv32%s ", check4rv32i()?"i":"e"); // architecture
ee_printf("\n");
// ee_printf("uart0: 115200 bps (div=%d)\n",io.uart.baud);
// ee_printf("timr0: frequency=%dHz (io.timer=%d)\n",(io.board_cm*2000000u)/(io.timer+1),io.timer);
ee_printf("\n\n");
// ee_printf("CoreMark start in %d us.\n",io.timeus);
// #error "Call board initialization routines in portable init (if needed), in particular initialize UART!\n"
if (sizeof(ee_ptr_int) != sizeof(ee_u8 *))
{
ee_printf(
"ERROR! Please define ee_ptr_int to a type that holds a "
"pointer!\n");
}
if (sizeof(ee_u32) != 4)
{
ee_printf("ERROR! Please define ee_u32 to a 32b unsigned type!\n");
}
p->portable_id = 1;
}
// Print "fixed point" number (integer/1000)
void printk(uint64_t kx) {
int intpart = (int)(kx / 1000);
int fracpart = (int)(kx % 1000);
printf("%d.",intpart);
if(fracpart<100) {
printf("0");
}
if(fracpart<10) {
printf("0");
}
printf("%d",fracpart);
}
void print_coremarks(uint64_t ticks) {
const uint64_t MHz = CLOCKS_PER_SEC/1000000;
// printf("*** MHz : %d\n",(int)MHz);
printf("*** Ticks : %d\n",(int)ticks);
uint64_t ksecs=ticks/(CLOCKS_PER_SEC/1000);
// printf("*** Time : "); printk(ksecs); printf("\n");
uint64_t kiter_per_sec= (uint64_t)(ITERATIONS*1000*1000)/ksecs;
// printf("*** Iter/s : "); printk(kiter_per_sec); printf("\n");
printf("*** Coremark/MHz : "); printk(kiter_per_sec/MHz); printf("\n");
uint64_t kticks2 = rdcycle() * (uint64_t)1000;
uint64_t instret2 = rdinstret();
printf("*** CPI (2) : "); printk(kticks2/instret2); printf("\n");
}
/* Function : portable_fini
Target specific final code
*/
void
portable_fini(core_portable *p)
{
//io.led = 0;
//ee_printf("CoreMark finish in %d us.\n\n",io.timeus);
p->portable_id = 0;
// makes no sense return here!
//while(1)
//{
// usleep(500000);
// io.led++;
//}
}

View File

@@ -0,0 +1,225 @@
#pragma once
#include <stdint.h>
/*
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Original Author: Shay Gal-on
*/
#define ITERATIONS 300
#define BUILD "testbench"
#define ARCH "petituyau"
#include <stddef.h>
/* Topic : Description
This file contains configuration constants required to execute on
different platforms
*/
#ifndef CORE_PORTME_H
#define CORE_PORTME_H
/************************/
/* Data types and settings */
/************************/
/* Configuration : HAS_FLOAT
Define to 1 if the platform supports floating point.
*/
#ifndef HAS_FLOAT
#define HAS_FLOAT 0
#endif
/* Configuration : HAS_TIME_H
Define to 1 if platform has the time.h header file,
and implementation of functions thereof.
*/
#ifndef HAS_TIME_H
#define HAS_TIME_H 0
#endif
/* Configuration : USE_CLOCK
Define to 1 if platform has the time.h header file,
and implementation of functions thereof.
*/
#ifndef USE_CLOCK
#define USE_CLOCK 0
#endif
/* Configuration : HAS_STDIO
Define to 1 if the platform has stdio.h.
*/
#ifndef HAS_STDIO
#define HAS_STDIO 0
#endif
/* Configuration : HAS_PRINTF
Define to 1 if the platform has stdio.h and implements the printf
function.
*/
#ifndef HAS_PRINTF
#define HAS_PRINTF 0
#endif
/* Definitions : COMPILER_VERSION, COMPILER_FLAGS, MEM_LOCATION
Initialize these strings per platform
*/
#ifndef COMPILER_VERSION
#ifdef __GNUC__
#define COMPILER_VERSION "GCC"__VERSION__
#else
#define COMPILER_VERSION "Please put compiler version here (e.g. gcc 4.1)"
#endif
#endif
#ifndef COMPILER_FLAGS
#define COMPILER_FLAGS "-O2"
#endif
#ifndef MEM_LOCATION
#define MEM_LOCATION "STACK"
#endif
/* Data Types :
To avoid compiler issues, define the data types that need ot be used for
8b, 16b and 32b in <core_portme.h>.
*Imprtant* :
ee_ptr_int needs to be the data type used to hold pointers, otherwise
coremark may fail!!!
*/
typedef signed short ee_s16;
typedef unsigned short ee_u16;
typedef signed int ee_s32;
typedef double ee_f32;
typedef unsigned char ee_u8;
typedef unsigned int ee_u32;
typedef ee_u32 ee_ptr_int;
typedef size_t ee_size_t;
#define NULL ((void *)0)
/* align_mem :
This macro is used to align an offset to point to a 32b value. It is
used in the Matrix algorithm to initialize the input memory blocks.
*/
#define align_mem(x) (void *)(4 + (((ee_ptr_int)(x)-1) & ~3))
/* Configuration : CORE_TICKS
Define type of return from the timing functions.
*/
//#define CORETIMETYPE ee_u32
//typedef ee_u32 CORE_TICKS;
#define CORETIMETYPE uint64_t
typedef uint64_t CORE_TICKS;
/* Configuration : SEED_METHOD
Defines method to get seed values that cannot be computed at compile
time.
Valid values :
SEED_ARG - from command line.
SEED_FUNC - from a system function.
SEED_VOLATILE - from volatile variables.
*/
#ifndef SEED_METHOD
#define SEED_METHOD SEED_VOLATILE
#endif
/* Configuration : MEM_METHOD
Defines method to get a block of memry.
Valid values :
MEM_MALLOC - for platforms that implement malloc and have malloc.h.
MEM_STATIC - to use a static memory array.
MEM_STACK - to allocate the data block on the stack (NYI).
*/
#ifndef MEM_METHOD
#define MEM_METHOD MEM_STACK
#endif
/* Configuration : MULTITHREAD
Define for parallel execution
Valid values :
1 - only one context (default).
N>1 - will execute N copies in parallel.
Note :
If this flag is defined to more then 1, an implementation for launching
parallel contexts must be defined.
Two sample implementations are provided. Use <USE_PTHREAD> or <USE_FORK>
to enable them.
It is valid to have a different implementation of <core_start_parallel>
and <core_end_parallel> in <core_portme.c>, to fit a particular architecture.
*/
#ifndef MULTITHREAD
#define MULTITHREAD 1
#define USE_PTHREAD 0
#define USE_FORK 0
#define USE_SOCKET 0
#endif
/* Configuration : MAIN_HAS_NOARGC
Needed if platform does not support getting arguments to main.
Valid values :
0 - argc/argv to main is supported
1 - argc/argv to main is not supported
Note :
This flag only matters if MULTITHREAD has been defined to a value
greater then 1.
*/
#ifndef MAIN_HAS_NOARGC
#define MAIN_HAS_NOARGC 1
#endif
/* Configuration : MAIN_HAS_NORETURN
Needed if platform does not support returning a value from main.
Valid values :
0 - main returns an int, and return value will be 0.
1 - platform does not support returning a value from main
*/
#ifndef MAIN_HAS_NORETURN
#define MAIN_HAS_NORETURN 0
#endif
/* Variable : default_num_contexts
Not used for this simple port, must contain the value 1.
*/
extern ee_u32 default_num_contexts;
typedef struct CORE_PORTABLE_S
{
ee_u8 portable_id;
} core_portable;
/* target specific init/fini */
void portable_init(core_portable *p, int *argc, char *argv[]);
void portable_fini(core_portable *p);
#if !defined(PROFILE_RUN) && !defined(PERFORMANCE_RUN) \
&& !defined(VALIDATION_RUN)
#if (TOTAL_DATA_SIZE == 1200)
#define PROFILE_RUN 1
#elif (TOTAL_DATA_SIZE == 2000)
#define PERFORMANCE_RUN 1
#else
#define VALIDATION_RUN 1
#endif
#endif
int ee_printf(const char *fmt, ...);
void print_coremarks(uint64_t ticks);
#endif /* CORE_PORTME_H */

View File

@@ -0,0 +1,330 @@
/*
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Original Author: Shay Gal-on
*/
#include "coremark.h"
/* local functions */
enum CORE_STATE core_state_transition(ee_u8 **instr, ee_u32 *transition_count);
/*
Topic: Description
Simple state machines like this one are used in many embedded products.
For more complex state machines, sometimes a state transition table
implementation is used instead, trading speed of direct coding for ease of
maintenance.
Since the main goal of using a state machine in CoreMark is to excercise
the switch/if behaviour, we are using a small moore machine.
In particular, this machine tests type of string input,
trying to determine whether the input is a number or something else.
(see core_state.png).
*/
/* Function: core_bench_state
Benchmark function
Go over the input twice, once direct, and once after introducing some
corruption.
*/
ee_u16
core_bench_state(ee_u32 blksize,
ee_u8 *memblock,
ee_s16 seed1,
ee_s16 seed2,
ee_s16 step,
ee_u16 crc)
{
ee_u32 final_counts[NUM_CORE_STATES];
ee_u32 track_counts[NUM_CORE_STATES];
ee_u8 *p = memblock;
ee_u32 i;
#if CORE_DEBUG
ee_printf("State Bench: %d,%d,%d,%04x\n", seed1, seed2, step, crc);
#endif
for (i = 0; i < NUM_CORE_STATES; i++)
{
final_counts[i] = track_counts[i] = 0;
}
/* run the state machine over the input */
while (*p != 0)
{
enum CORE_STATE fstate = core_state_transition(&p, track_counts);
final_counts[fstate]++;
#if CORE_DEBUG
ee_printf("%d,", fstate);
}
ee_printf("\n");
#else
}
#endif
p = memblock;
while (p < (memblock + blksize))
{ /* insert some corruption */
if (*p != ',')
*p ^= (ee_u8)seed1;
p += step;
}
p = memblock;
/* run the state machine over the input again */
while (*p != 0)
{
enum CORE_STATE fstate = core_state_transition(&p, track_counts);
final_counts[fstate]++;
#if CORE_DEBUG
ee_printf("%d,", fstate);
}
ee_printf("\n");
#else
}
#endif
p = memblock;
while (p < (memblock + blksize))
{ /* undo corruption is seed1 and seed2 are equal */
if (*p != ',')
*p ^= (ee_u8)seed2;
p += step;
}
/* end timing */
for (i = 0; i < NUM_CORE_STATES; i++)
{
crc = crcu32(final_counts[i], crc);
crc = crcu32(track_counts[i], crc);
}
return crc;
}
/* Default initialization patterns */
static ee_u8 *intpat[4]
= { (ee_u8 *)"5012", (ee_u8 *)"1234", (ee_u8 *)"-874", (ee_u8 *)"+122" };
static ee_u8 *floatpat[4] = { (ee_u8 *)"35.54400",
(ee_u8 *)".1234500",
(ee_u8 *)"-110.700",
(ee_u8 *)"+0.64400" };
static ee_u8 *scipat[4] = { (ee_u8 *)"5.500e+3",
(ee_u8 *)"-.123e-2",
(ee_u8 *)"-87e+832",
(ee_u8 *)"+0.6e-12" };
static ee_u8 *errpat[4] = { (ee_u8 *)"T0.3e-1F",
(ee_u8 *)"-T.T++Tq",
(ee_u8 *)"1T3.4e4z",
(ee_u8 *)"34.0e-T^" };
/* Function: core_init_state
Initialize the input data for the state machine.
Populate the input with several predetermined strings, interspersed.
Actual patterns chosen depend on the seed parameter.
Note:
The seed parameter MUST be supplied from a source that cannot be
determined at compile time
*/
void
core_init_state(ee_u32 size, ee_s16 seed, ee_u8 *p)
{
ee_u32 total = 0, next = 0, i;
ee_u8 *buf = 0;
#if CORE_DEBUG
ee_u8 *start = p;
ee_printf("State: %d,%d\n", size, seed);
#endif
size--;
next = 0;
while ((total + next + 1) < size)
{
if (next > 0)
{
for (i = 0; i < next; i++)
*(p + total + i) = buf[i];
*(p + total + i) = ',';
total += next + 1;
}
seed++;
switch (seed & 0x7)
{
case 0: /* int */
case 1: /* int */
case 2: /* int */
buf = intpat[(seed >> 3) & 0x3];
next = 4;
break;
case 3: /* float */
case 4: /* float */
buf = floatpat[(seed >> 3) & 0x3];
next = 8;
break;
case 5: /* scientific */
case 6: /* scientific */
buf = scipat[(seed >> 3) & 0x3];
next = 8;
break;
case 7: /* invalid */
buf = errpat[(seed >> 3) & 0x3];
next = 8;
break;
default: /* Never happen, just to make some compilers happy */
break;
}
}
size++;
while (total < size)
{ /* fill the rest with 0 */
*(p + total) = 0;
total++;
}
#if CORE_DEBUG
ee_printf("State Input: %s\n", start);
#endif
}
static ee_u8
ee_isdigit(ee_u8 c)
{
ee_u8 retval;
retval = ((c >= '0') & (c <= '9')) ? 1 : 0;
return retval;
}
/* Function: core_state_transition
Actual state machine.
The state machine will continue scanning until either:
1 - an invalid input is detected.
2 - a valid number has been detected.
The input pointer is updated to point to the end of the token, and the
end state is returned (either specific format determined or invalid).
*/
enum CORE_STATE
core_state_transition(ee_u8 **instr, ee_u32 *transition_count)
{
ee_u8 * str = *instr;
ee_u8 NEXT_SYMBOL;
enum CORE_STATE state = CORE_START;
for (; *str && state != CORE_INVALID; str++)
{
NEXT_SYMBOL = *str;
if (NEXT_SYMBOL == ',') /* end of this input */
{
str++;
break;
}
switch (state)
{
case CORE_START:
if (ee_isdigit(NEXT_SYMBOL))
{
state = CORE_INT;
}
else if (NEXT_SYMBOL == '+' || NEXT_SYMBOL == '-')
{
state = CORE_S1;
}
else if (NEXT_SYMBOL == '.')
{
state = CORE_FLOAT;
}
else
{
state = CORE_INVALID;
transition_count[CORE_INVALID]++;
}
transition_count[CORE_START]++;
break;
case CORE_S1:
if (ee_isdigit(NEXT_SYMBOL))
{
state = CORE_INT;
transition_count[CORE_S1]++;
}
else if (NEXT_SYMBOL == '.')
{
state = CORE_FLOAT;
transition_count[CORE_S1]++;
}
else
{
state = CORE_INVALID;
transition_count[CORE_S1]++;
}
break;
case CORE_INT:
if (NEXT_SYMBOL == '.')
{
state = CORE_FLOAT;
transition_count[CORE_INT]++;
}
else if (!ee_isdigit(NEXT_SYMBOL))
{
state = CORE_INVALID;
transition_count[CORE_INT]++;
}
break;
case CORE_FLOAT:
if (NEXT_SYMBOL == 'E' || NEXT_SYMBOL == 'e')
{
state = CORE_S2;
transition_count[CORE_FLOAT]++;
}
else if (!ee_isdigit(NEXT_SYMBOL))
{
state = CORE_INVALID;
transition_count[CORE_FLOAT]++;
}
break;
case CORE_S2:
if (NEXT_SYMBOL == '+' || NEXT_SYMBOL == '-')
{
state = CORE_EXPONENT;
transition_count[CORE_S2]++;
}
else
{
state = CORE_INVALID;
transition_count[CORE_S2]++;
}
break;
case CORE_EXPONENT:
if (ee_isdigit(NEXT_SYMBOL))
{
state = CORE_SCIENTIFIC;
transition_count[CORE_EXPONENT]++;
}
else
{
state = CORE_INVALID;
transition_count[CORE_EXPONENT]++;
}
break;
case CORE_SCIENTIFIC:
if (!ee_isdigit(NEXT_SYMBOL))
{
state = CORE_INVALID;
transition_count[CORE_INVALID]++;
}
break;
default:
break;
}
}
*instr = str;
return state;
}

View File

@@ -0,0 +1,249 @@
/*
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Original Author: Shay Gal-on
*/
#include "coremark.h"
/* Function: get_seed
Get a values that cannot be determined at compile time.
Since different embedded systems and compilers are used, 3 different
methods are provided: 1 - Using a volatile variable. This method is only
valid if the compiler is forced to generate code that reads the value of a
volatile variable from memory at run time. Please note, if using this method,
you would need to modify core_portme.c to generate training profile. 2 -
Command line arguments. This is the preferred method if command line
arguments are supported. 3 - System function. If none of the first 2 methods
is available on the platform, a system function which is not a stub can be
used.
e.g. read the value on GPIO pins connected to switches, or invoke
special simulator functions.
*/
#if (SEED_METHOD == SEED_VOLATILE)
extern volatile ee_s32 seed1_volatile;
extern volatile ee_s32 seed2_volatile;
extern volatile ee_s32 seed3_volatile;
extern volatile ee_s32 seed4_volatile;
extern volatile ee_s32 seed5_volatile;
ee_s32
get_seed_32(int i)
{
ee_s32 retval;
switch (i)
{
case 1:
retval = seed1_volatile;
break;
case 2:
retval = seed2_volatile;
break;
case 3:
retval = seed3_volatile;
break;
case 4:
retval = seed4_volatile;
break;
case 5:
retval = seed5_volatile;
break;
default:
retval = 0;
break;
}
return retval;
}
#elif (SEED_METHOD == SEED_ARG)
ee_s32
parseval(char *valstring)
{
ee_s32 retval = 0;
ee_s32 neg = 1;
int hexmode = 0;
if (*valstring == '-')
{
neg = -1;
valstring++;
}
if ((valstring[0] == '0') && (valstring[1] == 'x'))
{
hexmode = 1;
valstring += 2;
}
/* first look for digits */
if (hexmode)
{
while (((*valstring >= '0') && (*valstring <= '9'))
|| ((*valstring >= 'a') && (*valstring <= 'f')))
{
ee_s32 digit = *valstring - '0';
if (digit > 9)
digit = 10 + *valstring - 'a';
retval *= 16;
retval += digit;
valstring++;
}
}
else
{
while ((*valstring >= '0') && (*valstring <= '9'))
{
ee_s32 digit = *valstring - '0';
retval *= 10;
retval += digit;
valstring++;
}
}
/* now add qualifiers */
if (*valstring == 'K')
retval *= 1024;
if (*valstring == 'M')
retval *= 1024 * 1024;
retval *= neg;
return retval;
}
ee_s32
get_seed_args(int i, int argc, char *argv[])
{
if (argc > i)
return parseval(argv[i]);
return 0;
}
#elif (SEED_METHOD == SEED_FUNC)
/* If using OS based function, you must define and implement the functions below
* in core_portme.h and core_portme.c ! */
ee_s32
get_seed_32(int i)
{
ee_s32 retval;
switch (i)
{
case 1:
retval = portme_sys1();
break;
case 2:
retval = portme_sys2();
break;
case 3:
retval = portme_sys3();
break;
case 4:
retval = portme_sys4();
break;
case 5:
retval = portme_sys5();
break;
default:
retval = 0;
break;
}
return retval;
}
#endif
/* Function: crc*
Service functions to calculate 16b CRC code.
*/
ee_u16
crcu8(ee_u8 data, ee_u16 crc)
{
ee_u8 i = 0, x16 = 0, carry = 0;
for (i = 0; i < 8; i++)
{
x16 = (ee_u8)((data & 1) ^ ((ee_u8)crc & 1));
data >>= 1;
if (x16 == 1)
{
crc ^= 0x4002;
carry = 1;
}
else
carry = 0;
crc >>= 1;
if (carry)
crc |= 0x8000;
else
crc &= 0x7fff;
}
return crc;
}
ee_u16
crcu16(ee_u16 newval, ee_u16 crc)
{
crc = crcu8((ee_u8)(newval), crc);
crc = crcu8((ee_u8)((newval) >> 8), crc);
return crc;
}
ee_u16
crcu32(ee_u32 newval, ee_u16 crc)
{
crc = crc16((ee_s16)newval, crc);
crc = crc16((ee_s16)(newval >> 16), crc);
return crc;
}
ee_u16
crc16(ee_s16 newval, ee_u16 crc)
{
return crcu16((ee_u16)newval, crc);
}
ee_u8
check_data_types()
{
ee_u8 retval = 0;
if (sizeof(ee_u8) != 1)
{
ee_printf("ERROR: ee_u8 is not an 8b datatype!\n");
retval++;
}
if (sizeof(ee_u16) != 2)
{
ee_printf("ERROR: ee_u16 is not a 16b datatype!\n");
retval++;
}
if (sizeof(ee_s16) != 2)
{
ee_printf("ERROR: ee_s16 is not a 16b datatype!\n");
retval++;
}
if (sizeof(ee_s32) != 4)
{
ee_printf("ERROR: ee_s32 is not a 32b datatype!\n");
retval++;
}
if (sizeof(ee_u32) != 4)
{
ee_printf("ERROR: ee_u32 is not a 32b datatype!\n");
retval++;
}
if (sizeof(ee_ptr_int) != sizeof(int *))
{
ee_printf(
"ERROR: ee_ptr_int is not a datatype that holds an int pointer!\n");
retval++;
}
if (retval > 0)
{
ee_printf("ERROR: Please modify the datatypes in core_portme.h!\n");
}
return retval;
}

View File

@@ -0,0 +1,184 @@
#pragma once
/*
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Original Author: Shay Gal-on
*/
/* Topic: Description
This file contains declarations of the various benchmark functions.
*/
/* Configuration: TOTAL_DATA_SIZE
Define total size for data algorithms will operate on
*/
#ifndef TOTAL_DATA_SIZE
#define TOTAL_DATA_SIZE 2 * 1000
#endif
#define SEED_ARG 0
#define SEED_FUNC 1
#define SEED_VOLATILE 2
#define MEM_STATIC 0
#define MEM_MALLOC 1
#define MEM_STACK 2
#include "core_portme.h"
#if HAS_STDIO
#include <stdio.h>
#endif
#if HAS_PRINTF
#define ee_printf printf
#endif
/* Actual benchmark execution in iterate */
void *iterate(void *pres);
/* Typedef: secs_ret
For machines that have floating point support, get number of seconds as
a double. Otherwise an unsigned int.
*/
#if HAS_FLOAT
typedef double secs_ret;
#else
typedef ee_u32 secs_ret;
#endif
#if MAIN_HAS_NORETURN
#define MAIN_RETURN_VAL
#define MAIN_RETURN_TYPE void
#else
#define MAIN_RETURN_VAL 0
#define MAIN_RETURN_TYPE int
#endif
void start_time(void);
void stop_time(void);
CORE_TICKS get_time(void);
secs_ret time_in_secs(CORE_TICKS ticks);
/* Misc useful functions */
ee_u16 crcu8(ee_u8 data, ee_u16 crc);
ee_u16 crc16(ee_s16 newval, ee_u16 crc);
ee_u16 crcu16(ee_u16 newval, ee_u16 crc);
ee_u16 crcu32(ee_u32 newval, ee_u16 crc);
ee_u8 check_data_types(void);
void * portable_malloc(ee_size_t size);
void portable_free(void *p);
ee_s32 parseval(char *valstring);
/* Algorithm IDS */
#define ID_LIST (1 << 0)
#define ID_MATRIX (1 << 1)
#define ID_STATE (1 << 2)
#define ALL_ALGORITHMS_MASK (ID_LIST | ID_MATRIX | ID_STATE)
#define NUM_ALGORITHMS 3
/* list data structures */
typedef struct list_data_s
{
ee_s16 data16;
ee_s16 idx;
} list_data;
typedef struct list_head_s
{
struct list_head_s *next;
struct list_data_s *info;
} list_head;
/*matrix benchmark related stuff */
#define MATDAT_INT 1
#if MATDAT_INT
typedef ee_s16 MATDAT;
typedef ee_s32 MATRES;
#else
typedef ee_f16 MATDAT;
typedef ee_f32 MATRES;
#endif
typedef struct MAT_PARAMS_S
{
int N;
MATDAT *A;
MATDAT *B;
MATRES *C;
} mat_params;
/* state machine related stuff */
/* List of all the possible states for the FSM */
typedef enum CORE_STATE
{
CORE_START = 0,
CORE_INVALID,
CORE_S1,
CORE_S2,
CORE_INT,
CORE_FLOAT,
CORE_EXPONENT,
CORE_SCIENTIFIC,
NUM_CORE_STATES
} core_state_e;
/* Helper structure to hold results */
typedef struct RESULTS_S
{
/* inputs */
ee_s16 seed1; /* Initializing seed */
ee_s16 seed2; /* Initializing seed */
ee_s16 seed3; /* Initializing seed */
void * memblock[4]; /* Pointer to safe memory location */
ee_u32 size; /* Size of the data */
ee_u32 iterations; /* Number of iterations to execute */
ee_u32 execs; /* Bitmask of operations to execute */
struct list_head_s *list;
mat_params mat;
/* outputs */
ee_u16 crc;
ee_u16 crclist;
ee_u16 crcmatrix;
ee_u16 crcstate;
ee_s16 err;
/* ultithread specific */
core_portable port;
} core_results;
/* Multicore execution handling */
#if (MULTITHREAD > 1)
ee_u8 core_start_parallel(core_results *res);
ee_u8 core_stop_parallel(core_results *res);
#endif
/* list benchmark functions */
list_head *core_list_init(ee_u32 blksize, list_head *memblock, ee_s16 seed);
ee_u16 core_bench_list(core_results *res, ee_s16 finder_idx);
/* state benchmark functions */
void core_init_state(ee_u32 size, ee_s16 seed, ee_u8 *p);
ee_u16 core_bench_state(ee_u32 blksize,
ee_u8 *memblock,
ee_s16 seed1,
ee_s16 seed2,
ee_s16 step,
ee_u16 crc);
/* matrix benchmark functions */
ee_u32 core_init_matrix(ee_u32 blksize,
void * memblk,
ee_s32 seed,
mat_params *p);
ee_u16 core_bench_matrix(mat_params *p, ee_s16 seed, ee_u16 crc);

View File

@@ -0,0 +1,712 @@
/*
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include <io.h>
#include "coremark.h"
#include <stdarg.h>
#define ZEROPAD (1 << 0) /* Pad with zero */
#define SIGN (1 << 1) /* Unsigned/signed long */
#define PLUS (1 << 2) /* Show plus */
#define SPACE (1 << 3) /* Spacer */
#define LEFT (1 << 4) /* Left justified */
#define HEX_PREP (1 << 5) /* 0x */
#define UPPERCASE (1 << 6) /* 'ABCDEF' */
#define is_digit(c) ((c) >= '0' && (c) <= '9')
static char * digits = "0123456789abcdefghijklmnopqrstuvwxyz";
static char * upper_digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
static ee_size_t strnlen(const char *s, ee_size_t count);
static ee_size_t
strnlen(const char *s, ee_size_t count)
{
const char *sc;
for (sc = s; *sc != '\0' && count--; ++sc)
;
return sc - s;
}
static int
skip_atoi(const char **s)
{
int i = 0;
while (is_digit(**s))
i = i * 10 + *((*s)++) - '0';
return i;
}
static char *
number(char *str, long num, int base, int size, int precision, int type)
{
char c, sign, tmp[66];
char *dig = digits;
int i;
if (type & UPPERCASE)
dig = upper_digits;
if (type & LEFT)
type &= ~ZEROPAD;
if (base < 2 || base > 36)
return 0;
c = (type & ZEROPAD) ? '0' : ' ';
sign = 0;
if (type & SIGN)
{
if (num < 0)
{
sign = '-';
num = -num;
size--;
}
else if (type & PLUS)
{
sign = '+';
size--;
}
else if (type & SPACE)
{
sign = ' ';
size--;
}
}
if (type & HEX_PREP)
{
if (base == 16)
size -= 2;
else if (base == 8)
size--;
}
i = 0;
if (num == 0)
tmp[i++] = '0';
else
{
while (num != 0)
{
tmp[i++] = dig[((unsigned long)num) % (unsigned)base];
num = ((unsigned long)num) / (unsigned)base;
}
}
if (i > precision)
precision = i;
size -= precision;
if (!(type & (ZEROPAD | LEFT)))
while (size-- > 0)
*str++ = ' ';
if (sign)
*str++ = sign;
if (type & HEX_PREP)
{
if (base == 8)
*str++ = '0';
else if (base == 16)
{
*str++ = '0';
*str++ = digits[33];
}
}
if (!(type & LEFT))
while (size-- > 0)
*str++ = c;
while (i < precision--)
*str++ = '0';
while (i-- > 0)
*str++ = tmp[i];
while (size-- > 0)
*str++ = ' ';
return str;
}
static char *
eaddr(char *str, unsigned char *addr, int size, int precision, int type)
{
char tmp[24];
char *dig = digits;
int i, len;
if (type & UPPERCASE)
dig = upper_digits;
len = 0;
for (i = 0; i < 6; i++)
{
if (i != 0)
tmp[len++] = ':';
tmp[len++] = dig[addr[i] >> 4];
tmp[len++] = dig[addr[i] & 0x0F];
}
if (!(type & LEFT))
while (len < size--)
*str++ = ' ';
for (i = 0; i < len; ++i)
*str++ = tmp[i];
while (len < size--)
*str++ = ' ';
return str;
}
static char *
iaddr(char *str, unsigned char *addr, int size, int precision, int type)
{
char tmp[24];
int i, n, len;
len = 0;
for (i = 0; i < 4; i++)
{
if (i != 0)
tmp[len++] = '.';
n = addr[i];
if (n == 0)
tmp[len++] = digits[0];
else
{
if (n >= 100)
{
tmp[len++] = digits[n / 100];
n = n % 100;
tmp[len++] = digits[n / 10];
n = n % 10;
}
else if (n >= 10)
{
tmp[len++] = digits[n / 10];
n = n % 10;
}
tmp[len++] = digits[n];
}
}
if (!(type & LEFT))
while (len < size--)
*str++ = ' ';
for (i = 0; i < len; ++i)
*str++ = tmp[i];
while (len < size--)
*str++ = ' ';
return str;
}
#if HAS_FLOAT
char * ecvtbuf(double arg, int ndigits, int *decpt, int *sign, char *buf);
char * fcvtbuf(double arg, int ndigits, int *decpt, int *sign, char *buf);
static void ee_bufcpy(char *d, char *s, int count);
void
ee_bufcpy(char *pd, char *ps, int count)
{
char *pe = ps + count;
while (ps != pe)
*pd++ = *ps++;
}
static void
parse_float(double value, char *buffer, char fmt, int precision)
{
int decpt, sign, exp, pos;
char *digits = NULL;
char cvtbuf[80];
int capexp = 0;
int magnitude;
if (fmt == 'G' || fmt == 'E')
{
capexp = 1;
fmt += 'a' - 'A';
}
if (fmt == 'g')
{
digits = ecvtbuf(value, precision, &decpt, &sign, cvtbuf);
magnitude = decpt - 1;
if (magnitude < -4 || magnitude > precision - 1)
{
fmt = 'e';
precision -= 1;
}
else
{
fmt = 'f';
precision -= decpt;
}
}
if (fmt == 'e')
{
digits = ecvtbuf(value, precision + 1, &decpt, &sign, cvtbuf);
if (sign)
*buffer++ = '-';
*buffer++ = *digits;
if (precision > 0)
*buffer++ = '.';
ee_bufcpy(buffer, digits + 1, precision);
buffer += precision;
*buffer++ = capexp ? 'E' : 'e';
if (decpt == 0)
{
if (value == 0.0)
exp = 0;
else
exp = -1;
}
else
exp = decpt - 1;
if (exp < 0)
{
*buffer++ = '-';
exp = -exp;
}
else
*buffer++ = '+';
buffer[2] = (exp % 10) + '0';
exp = exp / 10;
buffer[1] = (exp % 10) + '0';
exp = exp / 10;
buffer[0] = (exp % 10) + '0';
buffer += 3;
}
else if (fmt == 'f')
{
digits = fcvtbuf(value, precision, &decpt, &sign, cvtbuf);
if (sign)
*buffer++ = '-';
if (*digits)
{
if (decpt <= 0)
{
*buffer++ = '0';
*buffer++ = '.';
for (pos = 0; pos < -decpt; pos++)
*buffer++ = '0';
while (*digits)
*buffer++ = *digits++;
}
else
{
pos = 0;
while (*digits)
{
if (pos++ == decpt)
*buffer++ = '.';
*buffer++ = *digits++;
}
}
}
else
{
*buffer++ = '0';
if (precision > 0)
{
*buffer++ = '.';
for (pos = 0; pos < precision; pos++)
*buffer++ = '0';
}
}
}
*buffer = '\0';
}
static void
decimal_point(char *buffer)
{
while (*buffer)
{
if (*buffer == '.')
return;
if (*buffer == 'e' || *buffer == 'E')
break;
buffer++;
}
if (*buffer)
{
int n = strnlen(buffer, 256);
while (n > 0)
{
buffer[n + 1] = buffer[n];
n--;
}
*buffer = '.';
}
else
{
*buffer++ = '.';
*buffer = '\0';
}
}
static void
cropzeros(char *buffer)
{
char *stop;
while (*buffer && *buffer != '.')
buffer++;
if (*buffer++)
{
while (*buffer && *buffer != 'e' && *buffer != 'E')
buffer++;
stop = buffer--;
while (*buffer == '0')
buffer--;
if (*buffer == '.')
buffer--;
while (buffer != stop)
*++buffer = 0;
}
}
static char *
flt(char *str, double num, int size, int precision, char fmt, int flags)
{
char tmp[80];
char c, sign;
int n, i;
// Left align means no zero padding
if (flags & LEFT)
flags &= ~ZEROPAD;
// Determine padding and sign char
c = (flags & ZEROPAD) ? '0' : ' ';
sign = 0;
if (flags & SIGN)
{
if (num < 0.0)
{
sign = '-';
num = -num;
size--;
}
else if (flags & PLUS)
{
sign = '+';
size--;
}
else if (flags & SPACE)
{
sign = ' ';
size--;
}
}
// Compute the precision value
if (precision < 0)
precision = 6; // Default precision: 6
// Convert floating point number to text
parse_float(num, tmp, fmt, precision);
if ((flags & HEX_PREP) && precision == 0)
decimal_point(tmp);
if (fmt == 'g' && !(flags & HEX_PREP))
cropzeros(tmp);
n = strnlen(tmp, 256);
// Output number with alignment and padding
size -= n;
if (!(flags & (ZEROPAD | LEFT)))
while (size-- > 0)
*str++ = ' ';
if (sign)
*str++ = sign;
if (!(flags & LEFT))
while (size-- > 0)
*str++ = c;
for (i = 0; i < n; i++)
*str++ = tmp[i];
while (size-- > 0)
*str++ = ' ';
return str;
}
#endif
static int
ee_vsprintf(char *buf, const char *fmt, va_list args)
{
int len;
unsigned long num;
int i, base;
char * str;
char * s;
int flags; // Flags to number()
int field_width; // Width of output field
int precision; // Min. # of digits for integers; max number of chars for
// from string
int qualifier; // 'h', 'l', or 'L' for integer fields
for (str = buf; *fmt; fmt++)
{
if (*fmt != '%')
{
*str++ = *fmt;
continue;
}
// Process flags
flags = 0;
repeat:
fmt++; // This also skips first '%'
switch (*fmt)
{
case '-':
flags |= LEFT;
goto repeat;
case '+':
flags |= PLUS;
goto repeat;
case ' ':
flags |= SPACE;
goto repeat;
case '#':
flags |= HEX_PREP;
goto repeat;
case '0':
flags |= ZEROPAD;
goto repeat;
}
// Get field width
field_width = -1;
if (is_digit(*fmt))
field_width = skip_atoi(&fmt);
else if (*fmt == '*')
{
fmt++;
field_width = va_arg(args, int);
if (field_width < 0)
{
field_width = -field_width;
flags |= LEFT;
}
}
// Get the precision
precision = -1;
if (*fmt == '.')
{
++fmt;
if (is_digit(*fmt))
precision = skip_atoi(&fmt);
else if (*fmt == '*')
{
++fmt;
precision = va_arg(args, int);
}
if (precision < 0)
precision = 0;
}
// Get the conversion qualifier
qualifier = -1;
if (*fmt == 'l' || *fmt == 'L')
{
qualifier = *fmt;
fmt++;
}
// Default base
base = 10;
switch (*fmt)
{
case 'c':
if (!(flags & LEFT))
while (--field_width > 0)
*str++ = ' ';
*str++ = (unsigned char)va_arg(args, int);
while (--field_width > 0)
*str++ = ' ';
continue;
case 's':
s = va_arg(args, char *);
if (!s)
s = "<NULL>";
len = strnlen(s, precision);
if (!(flags & LEFT))
while (len < field_width--)
*str++ = ' ';
for (i = 0; i < len; ++i)
*str++ = *s++;
while (len < field_width--)
*str++ = ' ';
continue;
case 'p':
if (field_width == -1)
{
field_width = 2 * sizeof(void *);
flags |= ZEROPAD;
}
str = number(str,
(unsigned long)va_arg(args, void *),
16,
field_width,
precision,
flags);
continue;
case 'A':
flags |= UPPERCASE;
case 'a':
if (qualifier == 'l')
str = eaddr(str,
va_arg(args, unsigned char *),
field_width,
precision,
flags);
else
str = iaddr(str,
va_arg(args, unsigned char *),
field_width,
precision,
flags);
continue;
// Integer number formats - set up the flags and "break"
case 'o':
base = 8;
break;
case 'X':
flags |= UPPERCASE;
case 'x':
base = 16;
break;
case 'd':
case 'i':
flags |= SIGN;
case 'u':
break;
#if HAS_FLOAT
case 'f':
str = flt(str,
va_arg(args, double),
field_width,
precision,
*fmt,
flags | SIGN);
continue;
#endif
default:
if (*fmt != '%')
*str++ = '%';
if (*fmt)
*str++ = *fmt;
else
--fmt;
continue;
}
if (qualifier == 'l')
num = va_arg(args, unsigned long);
else if (flags & SIGN)
num = va_arg(args, int);
else
num = va_arg(args, unsigned int);
str = number(str, num, base, field_width, precision, flags);
}
*str = '\0';
return str - buf;
}
void
uart_send_char(char c)
{
putchar(c);
/*
if(c=='\n')
{
while(io.uart.stat&1); // uart busy, wait...
io.uart.fifo = '\r';
}
while(io.uart.stat&1); // uart busy, wait...
io.uart.fifo = c;
*/
// #error "You must implement the method uart_send_char to use this file!\n";
/* Output of a char to a UART usually follows the following model:
Wait until UART is ready
Write char to UART
Wait until UART is done
Or in code:
while (*UART_CONTROL_ADDRESS != UART_READY);
*UART_DATA_ADDRESS = c;
while (*UART_CONTROL_ADDRESS != UART_READY);
Check the UART sample code on your platform or the board
documentation.
*/
}
int
ee_printf(const char *fmt, ...)
{
char buf[1024], *p;
va_list args;
int n = 0;
va_start(args, fmt);
ee_vsprintf(buf, fmt, args);
va_end(args);
p = buf;
while (*p)
{
uart_send_char(*p);
n++;
p++;
}
return n;
}

425
FIRMWARE/DHRYSTONE/dhry.h Normal file
View File

@@ -0,0 +1,425 @@
/*
****************************************************************************
*
* "DHRYSTONE" Benchmark Program
* -----------------------------
*
* Version: C, Version 2.1
*
* File: dhry.h (part 1 of 3)
*
* Date: May 25, 1988
*
* Author: Reinhold P. Weicker
* Siemens AG, AUT E 51
* Postfach 3220
* 8520 Erlangen
* Germany (West)
* Phone: [+49]-9131-7-20330
* (8-17 Central European Time)
* Usenet: ..!mcsun!unido!estevax!weicker
*
* Original Version (in Ada) published in
* "Communications of the ACM" vol. 27., no. 10 (Oct. 1984),
* pp. 1013 - 1030, together with the statistics
* on which the distribution of statements etc. is based.
*
* In this C version, the following C library functions are used:
* - strcpy, strcmp (inside the measurement loop)
* - printf, scanf (outside the measurement loop)
* In addition, Berkeley UNIX system calls "times ()" or "time ()"
* are used for execution time measurement. For measurements
* on other systems, these calls have to be changed.
*
* Collection of Results:
* Reinhold Weicker (address see above) and
*
* Rick Richardson
* PC Research. Inc.
* 94 Apple Orchard Drive
* Tinton Falls, NJ 07724
* Phone: (201) 389-8963 (9-17 EST)
* Usenet: ...!uunet!pcrat!rick
*
* Please send results to Rick Richardson and/or Reinhold Weicker.
* Complete information should be given on hardware and software used.
* Hardware information includes: Machine type, CPU, type and size
* of caches; for microprocessors: clock frequency, memory speed
* (number of wait states).
* Software information includes: Compiler (and runtime library)
* manufacturer and version, compilation switches, OS version.
* The Operating System version may give an indication about the
* compiler; Dhrystone itself performs no OS calls in the measurement loop.
*
* The complete output generated by the program should be mailed
* such that at least some checks for correctness can be made.
*
***************************************************************************
*
* History: This version C/2.1 has been made for two reasons:
*
* 1) There is an obvious need for a common C version of
* Dhrystone, since C is at present the most popular system
* programming language for the class of processors
* (microcomputers, minicomputers) where Dhrystone is used most.
* There should be, as far as possible, only one C version of
* Dhrystone such that results can be compared without
* restrictions. In the past, the C versions distributed
* by Rick Richardson (Version 1.1) and by Reinhold Weicker
* had small (though not significant) differences.
*
* 2) As far as it is possible without changes to the Dhrystone
* statistics, optimizing compilers should be prevented from
* removing significant statements.
*
* This C version has been developed in cooperation with
* Rick Richardson (Tinton Falls, NJ), it incorporates many
* ideas from the "Version 1.1" distributed previously by
* him over the UNIX network Usenet.
* I also thank Chaim Benedelac (National Semiconductor),
* David Ditzel (SUN), Earl Killian and John Mashey (MIPS),
* Alan Smith and Rafael Saavedra-Barrera (UC at Berkeley)
* for their help with comments on earlier versions of the
* benchmark.
*
* Changes: In the initialization part, this version follows mostly
* Rick Richardson's version distributed via Usenet, not the
* version distributed earlier via floppy disk by Reinhold Weicker.
* As a concession to older compilers, names have been made
* unique within the first 8 characters.
* Inside the measurement loop, this version follows the
* version previously distributed by Reinhold Weicker.
*
* At several places in the benchmark, code has been added,
* but within the measurement loop only in branches that
* are not executed. The intention is that optimizing compilers
* should be prevented from moving code out of the measurement
* loop, or from removing code altogether. Since the statements
* that are executed within the measurement loop have NOT been
* changed, the numbers defining the "Dhrystone distribution"
* (distribution of statements, operand types and locality)
* still hold. Except for sophisticated optimizing compilers,
* execution times for this version should be the same as
* for previous versions.
*
* Since it has proven difficult to subtract the time for the
* measurement loop overhead in a correct way, the loop check
* has been made a part of the benchmark. This does have
* an impact - though a very minor one - on the distribution
* statistics which have been updated for this version.
*
* All changes within the measurement loop are described
* and discussed in the companion paper "Rationale for
* Dhrystone version 2".
*
* Because of the self-imposed limitation that the order and
* distribution of the executed statements should not be
* changed, there are still cases where optimizing compilers
* may not generate code for some statements. To a certain
* degree, this is unavoidable for small synthetic benchmarks.
* Users of the benchmark are advised to check code listings
* whether code is generated for all statements of Dhrystone.
*
* Version 2.1 is identical to version 2.0 distributed via
* the UNIX network Usenet in March 1988 except that it corrects
* some minor deficiencies that were found by users of version 2.0.
* The only change within the measurement loop is that a
* non-executed "else" part was added to the "if" statement in
* Func_3, and a non-executed "else" part removed from Proc_3.
*
***************************************************************************
*
* Defines: The following "Defines" are possible:
* -DREG=register (default: Not defined)
* As an approximation to what an average C programmer
* might do, the "register" storage class is applied
* (if enabled by -DREG=register)
* - for local variables, if they are used (dynamically)
* five or more times
* - for parameters if they are used (dynamically)
* six or more times
* Note that an optimal "register" strategy is
* compiler-dependent, and that "register" declarations
* do not necessarily lead to faster execution.
* -DNOSTRUCTASSIGN (default: Not defined)
* Define if the C compiler does not support
* assignment of structures.
* -DNOENUMS (default: Not defined)
* Define if the C compiler does not support
* enumeration types.
* -DTIMES (default)
* -DTIME
* The "times" function of UNIX (returning process times)
* or the "time" function (returning wallclock time)
* is used for measurement.
* For single user machines, "time ()" is adequate. For
* multi-user machines where you cannot get single-user
* access, use the "times ()" function. If you have
* neither, use a stopwatch in the dead of night.
* "printf"s are provided marking the points "Start Timer"
* and "Stop Timer". DO NOT use the UNIX "time(1)"
* command, as this will measure the total time to
* run this program, which will (erroneously) include
* the time to allocate storage (malloc) and to perform
* the initialization.
* -DHZ=nnn
* In Berkeley UNIX, the function "times" returns process
* time in 1/HZ seconds, with HZ = 60 for most systems.
* CHECK YOUR SYSTEM DESCRIPTION BEFORE YOU JUST APPLY
* A VALUE.
*
***************************************************************************
*
* Compilation model and measurement (IMPORTANT):
*
* This C version of Dhrystone consists of three files:
* - dhry.h (this file, containing global definitions and comments)
* - dhry_1.c (containing the code corresponding to Ada package Pack_1)
* - dhry_2.c (containing the code corresponding to Ada package Pack_2)
*
* The following "ground rules" apply for measurements:
* - Separate compilation
* - No procedure merging
* - Otherwise, compiler optimizations are allowed but should be indicated
* - Default results are those without register declarations
* See the companion paper "Rationale for Dhrystone Version 2" for a more
* detailed discussion of these ground rules.
*
* For 16-Bit processors (e.g. 80186, 80286), times for all compilation
* models ("small", "medium", "large" etc.) should be given if possible,
* together with a definition of these models for the compiler system used.
*
**************************************************************************
*
* Dhrystone (C version) statistics:
*
* [Comment from the first distribution, updated for version 2.
* Note that because of language differences, the numbers are slightly
* different from the Ada version.]
*
* The following program contains statements of a high level programming
* language (here: C) in a distribution considered representative:
*
* assignments 52 (51.0 %)
* control statements 33 (32.4 %)
* procedure, function calls 17 (16.7 %)
*
* 103 statements are dynamically executed. The program is balanced with
* respect to the three aspects:
*
* - statement type
* - operand type
* - operand locality
* operand global, local, parameter, or constant.
*
* The combination of these three aspects is balanced only approximately.
*
* 1. Statement Type:
* ----------------- number
*
* V1 = V2 9
* (incl. V1 = F(..)
* V = Constant 12
* Assignment, 7
* with array element
* Assignment, 6
* with record component
* --
* 34 34
*
* X = Y +|-|"&&"|"|" Z 5
* X = Y +|-|"==" Constant 6
* X = X +|- 1 3
* X = Y *|/ Z 2
* X = Expression, 1
* two operators
* X = Expression, 1
* three operators
* --
* 18 18
*
* if .... 14
* with "else" 7
* without "else" 7
* executed 3
* not executed 4
* for ... 7 | counted every time
* while ... 4 | the loop condition
* do ... while 1 | is evaluated
* switch ... 1
* break 1
* declaration with 1
* initialization
* --
* 34 34
*
* P (...) procedure call 11
* user procedure 10
* library procedure 1
* X = F (...)
* function call 6
* user function 5
* library function 1
* --
* 17 17
* ---
* 103
*
* The average number of parameters in procedure or function calls
* is 1.82 (not counting the function values as implicit parameters).
*
*
* 2. Operators
* ------------
* number approximate
* percentage
*
* Arithmetic 32 50.8
*
* + 21 33.3
* - 7 11.1
* * 3 4.8
* / (int div) 1 1.6
*
* Comparison 27 42.8
*
* == 9 14.3
* /= 4 6.3
* > 1 1.6
* < 3 4.8
* >= 1 1.6
* <= 9 14.3
*
* Logic 4 6.3
*
* && (AND-THEN) 1 1.6
* | (OR) 1 1.6
* ! (NOT) 2 3.2
*
* -- -----
* 63 100.1
*
*
* 3. Operand Type (counted once per operand reference):
* ---------------
* number approximate
* percentage
*
* Integer 175 72.3 %
* Character 45 18.6 %
* Pointer 12 5.0 %
* String30 6 2.5 %
* Array 2 0.8 %
* Record 2 0.8 %
* --- -------
* 242 100.0 %
*
* When there is an access path leading to the final operand (e.g. a record
* component), only the final data type on the access path is counted.
*
*
* 4. Operand Locality:
* -------------------
* number approximate
* percentage
*
* local variable 114 47.1 %
* global variable 22 9.1 %
* parameter 45 18.6 %
* value 23 9.5 %
* reference 22 9.1 %
* function result 6 2.5 %
* constant 55 22.7 %
* --- -------
* 242 100.0 %
*
*
* The program does not compute anything meaningful, but it is syntactically
* and semantically correct. All variables have a value assigned to them
* before they are used as a source operand.
*
* There has been no explicit effort to account for the effects of a
* cache, or to balance the use of long or short displacements for code or
* data.
*
***************************************************************************
*/
#pragma once
/* Compiler and system dependent definitions: */
#ifndef TIME
#define TIMES
#endif
/* Use times(2) time function unless */
/* explicitly defined otherwise */
#ifdef TIMES
#include <sys/types.h>
#include <sys/times.h>
/* for "times" */
#endif
#define Mic_secs_Per_Second 80000000.0
/* Berkeley UNIX C returns process times in seconds/HZ */
#ifdef NOSTRUCTASSIGN
#define structassign(d, s) memcpy(&(d), &(s), sizeof(d))
#else
#define structassign(d, s) d = s
#endif
#ifdef NOENUM
#define Ident_1 0
#define Ident_2 1
#define Ident_3 2
#define Ident_4 3
#define Ident_5 4
typedef int Enumeration;
#else
typedef enum {Ident_1, Ident_2, Ident_3, Ident_4, Ident_5}
Enumeration;
#endif
/* for boolean and enumeration types in Ada, Pascal */
/* General definitions: */
//#include <stdio.h>
/* for strcpy, strcmp */
#define Null 0
/* Value of a Null pointer */
#define true 1
#define false 0
typedef int One_Thirty;
typedef int One_Fifty;
typedef char Capital_Letter;
typedef int Boolean;
typedef char Str_30 [31];
typedef int Arr_1_Dim [50];
typedef int Arr_2_Dim [50] [50];
typedef struct record
{
struct record *Ptr_Comp;
Enumeration Discr;
union {
struct {
Enumeration Enum_Comp;
int Int_Comp;
char Str_Comp [31];
} var_1;
struct {
Enumeration E_Comp_2;
char Str_2_Comp [31];
} var_2;
struct {
char Ch_1_Comp;
char Ch_2_Comp;
} var_3;
} variant;
} Rec_Type, *Rec_Pointer;

384
FIRMWARE/DHRYSTONE/dhry_1.c Normal file
View File

@@ -0,0 +1,384 @@
/*
****************************************************************************
*
* "DHRYSTONE" Benchmark Program
* -----------------------------
*
* Version: C, Version 2.1
*
* File: dhry_1.c (part 2 of 3)
*
* Date: May 25, 1988
*
* Author: Reinhold P. Weicker
*
****************************************************************************
*/
#include "dhry.h"
#include <stdint.h>
/* Global Variables: */
Rec_Pointer Ptr_Glob,
Next_Ptr_Glob;
int Int_Glob;
Boolean Bool_Glob;
char Ch_1_Glob,
Ch_2_Glob;
int Arr_1_Glob [50];
int Arr_2_Glob [50] [50];
Enumeration Func_1 ();
/* forward declaration necessary since Enumeration may not simply be int */
#ifndef REG
Boolean Reg = false;
#define REG
/* REG becomes defined as empty */
/* i.e. no register variables */
#else
Boolean Reg = true;
#endif
/* variables for time measurement: */
extern uint64_t rdcycle();
extern uint64_t rdinstret();
uint64_t Begin_Time,
End_Time,
User_Time;
uint64_t Begin_Insn,
End_Insn,
User_Insn;
/* end of variables for time measurement */
main ()
/*****/
/* main program, corresponds to procedures */
/* Main and Proc_0 in the Ada version */
{
One_Fifty Int_1_Loc;
REG One_Fifty Int_2_Loc;
One_Fifty Int_3_Loc;
REG char Ch_Index;
Enumeration Enum_Loc;
Str_30 Str_1_Loc;
Str_30 Str_2_Loc;
REG int Run_Index;
REG int Number_Of_Runs;
Rec_Type R1,R2;
/* Initializations */
/*
* FEMTOSOC/FEMTORV32 modifications ===========================
*/
/*
* Since there are only two calls to malloc(), and that malloc()
* is not supported yet by femtosoc lib, I replaced them with
* pre-allocated structures.
*/
Next_Ptr_Glob = &R1; // (Rec_Pointer) malloc (sizeof (Rec_Type));
Ptr_Glob = &R2; // (Rec_Pointer) malloc (sizeof (Rec_Type));
/*
* End of FEMTOSOC/FEMTORV32 modifications ======================
*/
Ptr_Glob->Ptr_Comp = Next_Ptr_Glob;
Ptr_Glob->Discr = Ident_1;
Ptr_Glob->variant.var_1.Enum_Comp = Ident_3;
Ptr_Glob->variant.var_1.Int_Comp = 40;
strcpy (Ptr_Glob->variant.var_1.Str_Comp,
"DHRYSTONE PROGRAM, SOME STRING");
strcpy (Str_1_Loc, "DHRYSTONE PROGRAM, 1'ST STRING");
Arr_2_Glob [8][7] = 10;
/* Was missing in published program. Without this statement, */
/* Arr_2_Glob [8][7] would have an undefined value. */
/* Warning: With 16-Bit processors and Number_Of_Runs > 32000, */
/* overflow may occur for this array element. */
printf ("\n");
printf ("Dhrystone Benchmark, Version 2.1 (Language: C)\n");
printf ("\n");
if (Reg)
{
printf ("Program compiled with 'register' attribute\n");
printf ("\n");
}
else
{
printf ("Program compiled without 'register' attribute\n");
printf ("\n");
}
printf ("Please give the number of runs through the benchmark: ");
{
// int n;
// scanf ("%d", &n);
Number_Of_Runs = 50000;
}
printf ("\n");
printf ("Execution starts, %d runs through Dhrystone\n", Number_Of_Runs);
/***************/
/* Start timer */
/***************/
Begin_Time = rdcycle();
Begin_Insn = rdinstret();
printf(">>> Begin_time=%d\n", (int)Begin_Time);
printf(">>> Begin_insn=%d\n", (int)Begin_Insn);
for (Run_Index = 1; Run_Index <= Number_Of_Runs; ++Run_Index)
{
Proc_5();
Proc_4();
/* Ch_1_Glob == 'A', Ch_2_Glob == 'B', Bool_Glob == true */
Int_1_Loc = 2;
Int_2_Loc = 3;
strcpy (Str_2_Loc, "DHRYSTONE PROGRAM, 2'ND STRING");
Enum_Loc = Ident_2;
Bool_Glob = ! Func_2 (Str_1_Loc, Str_2_Loc);
/* Bool_Glob == 1 */
while (Int_1_Loc < Int_2_Loc) /* loop body executed once */
{
Int_3_Loc = 5 * Int_1_Loc - Int_2_Loc;
/* Int_3_Loc == 7 */
Proc_7 (Int_1_Loc, Int_2_Loc, &Int_3_Loc);
/* Int_3_Loc == 7 */
Int_1_Loc += 1;
} /* while */
/* Int_1_Loc == 3, Int_2_Loc == 3, Int_3_Loc == 7 */
Proc_8 (Arr_1_Glob, Arr_2_Glob, Int_1_Loc, Int_3_Loc);
/* Int_Glob == 5 */
Proc_1 (Ptr_Glob);
for (Ch_Index = 'A'; Ch_Index <= Ch_2_Glob; ++Ch_Index)
/* loop body executed twice */
{
if (Enum_Loc == Func_1 (Ch_Index, 'C'))
/* then, not executed */
{
Proc_6 (Ident_1, &Enum_Loc);
strcpy (Str_2_Loc, "DHRYSTONE PROGRAM, 3'RD STRING");
Int_2_Loc = Run_Index;
Int_Glob = Run_Index;
}
}
/* Int_1_Loc == 3, Int_2_Loc == 3, Int_3_Loc == 7 */
Int_2_Loc = Int_2_Loc * Int_1_Loc;
Int_1_Loc = Int_2_Loc / Int_3_Loc;
Int_2_Loc = 7 * (Int_2_Loc - Int_3_Loc) - Int_1_Loc;
/* Int_1_Loc == 1, Int_2_Loc == 13, Int_3_Loc == 7 */
Proc_2 (&Int_1_Loc);
/* Int_1_Loc == 5 */
} /* loop "for Run_Index" */
/**************/
/* Stop timer */
/**************/
End_Time = rdcycle();
End_Insn = rdinstret();
printf ("Execution ends\n");
printf ("\n");
printf ("Final values of the variables used in the benchmark:\n");
printf ("\n");
printf ("Int_Glob: %d\n", Int_Glob);
printf (" should be: %d\n", 5);
printf ("Bool_Glob: %d\n", Bool_Glob);
printf (" should be: %d\n", 1);
printf ("Ch_1_Glob: %c\n", Ch_1_Glob);
printf (" should be: %c\n", 'A');
printf ("Ch_2_Glob: %c\n", Ch_2_Glob);
printf (" should be: %c\n", 'B');
printf ("Arr_1_Glob[8]: %d\n", Arr_1_Glob[8]);
printf (" should be: %d\n", 7);
printf ("Arr_2_Glob[8][7]: %d\n", Arr_2_Glob[8][7]);
printf (" should be: Number_Of_Runs + 10\n");
printf ("Ptr_Glob->\n");
printf (" Ptr_Comp: %d\n", (int) Ptr_Glob->Ptr_Comp);
printf (" should be: (implementation-dependent)\n");
printf (" Discr: %d\n", Ptr_Glob->Discr);
printf (" should be: %d\n", 0);
printf (" Enum_Comp: %d\n", Ptr_Glob->variant.var_1.Enum_Comp);
printf (" should be: %d\n", 2);
printf (" Int_Comp: %d\n", Ptr_Glob->variant.var_1.Int_Comp);
printf (" should be: %d\n", 17);
printf (" Str_Comp: %s\n", Ptr_Glob->variant.var_1.Str_Comp);
printf (" should be: DHRYSTONE PROGRAM, SOME STRING\n");
printf ("Next_Ptr_Glob->\n");
printf (" Ptr_Comp: %d\n", (int) Next_Ptr_Glob->Ptr_Comp);
printf (" should be: (implementation-dependent), same as above\n");
printf (" Discr: %d\n", Next_Ptr_Glob->Discr);
printf (" should be: %d\n", 0);
printf (" Enum_Comp: %d\n", Next_Ptr_Glob->variant.var_1.Enum_Comp);
printf (" should be: %d\n", 1);
printf (" Int_Comp: %d\n", Next_Ptr_Glob->variant.var_1.Int_Comp);
printf (" should be: %d\n", 18);
printf (" Str_Comp: %s\n",
Next_Ptr_Glob->variant.var_1.Str_Comp);
printf (" should be: DHRYSTONE PROGRAM, SOME STRING\n");
printf ("Int_1_Loc: %d\n", Int_1_Loc);
printf (" should be: %d\n", 5);
printf ("Int_2_Loc: %d\n", Int_2_Loc);
printf (" should be: %d\n", 13);
printf ("Int_3_Loc: %d\n", Int_3_Loc);
printf (" should be: %d\n", 7);
printf ("Enum_Loc: %d\n", Enum_Loc);
printf (" should be: %d\n", 1);
printf ("Str_1_Loc: %s\n", Str_1_Loc);
printf (" should be: DHRYSTONE PROGRAM, 1'ST STRING\n");
printf ("Str_2_Loc: %s\n", Str_2_Loc);
printf (" should be: DHRYSTONE PROGRAM, 2'ND STRING\n");
printf ("\n");
User_Time = End_Time - Begin_Time;
User_Insn = End_Insn - Begin_Insn;
printf("Number_Of_Runs: %d\n", Number_Of_Runs);
printf("User_Time: %d cycles, %d insn\n", (int)User_Time, (int)User_Insn);
uint64_t Cycles_Per_Instruction_x1000 = (1000 * User_Time) / User_Insn;
printf("Cycles_Per_Instruction: %d.%d%d%d\n",
(int)( Cycles_Per_Instruction_x1000 / 1000),
(int)((Cycles_Per_Instruction_x1000 / 100 ) % 10),
(int)((Cycles_Per_Instruction_x1000 / 10 ) % 10),
(int)((Cycles_Per_Instruction_x1000 / 1 ) % 10)
);
show_CPI_2();
uint64_t Dhrystones_Per_Second_Per_MHz = ((uint64_t)Number_Of_Runs * 1000000) / User_Time;
printf("Dhrystones_Per_Second_Per_MHz: %d\n", (int)Dhrystones_Per_Second_Per_MHz);
/*
* "Another common representation of the Dhrystone benchmark is the DMIPS (Dhrystone MIPS) obtained
* when the Dhrystone score is divided by 1757 (the number of Dhrystones per second obtained on the
* VAX 11/780, nominally a 1 MIPS machine)."
*/
int DMIPS_Per_MHz_x1000 = ((uint64_t)1000 * Dhrystones_Per_Second_Per_MHz) / 1757;
printf("DMIPS_Per_MHz: %d.%d%d%d\n",
(int)(DMIPS_Per_MHz_x1000 / 1000),
(int)((DMIPS_Per_MHz_x1000 / 100) % 10),
(int)((DMIPS_Per_MHz_x1000 / 10) % 10),
(int)((DMIPS_Per_MHz_x1000 / 1) % 10));
return 0;
}
Proc_1 (Ptr_Val_Par)
/******************/
REG Rec_Pointer Ptr_Val_Par;
/* executed once */
{
REG Rec_Pointer Next_Record = Ptr_Val_Par->Ptr_Comp;
/* == Ptr_Glob_Next */
/* Local variable, initialized with Ptr_Val_Par->Ptr_Comp, */
/* corresponds to "rename" in Ada, "with" in Pascal */
structassign (*Ptr_Val_Par->Ptr_Comp, *Ptr_Glob);
Ptr_Val_Par->variant.var_1.Int_Comp = 5;
Next_Record->variant.var_1.Int_Comp
= Ptr_Val_Par->variant.var_1.Int_Comp;
Next_Record->Ptr_Comp = Ptr_Val_Par->Ptr_Comp;
Proc_3 (&Next_Record->Ptr_Comp);
/* Ptr_Val_Par->Ptr_Comp->Ptr_Comp
== Ptr_Glob->Ptr_Comp */
if (Next_Record->Discr == Ident_1)
/* then, executed */
{
Next_Record->variant.var_1.Int_Comp = 6;
Proc_6 (Ptr_Val_Par->variant.var_1.Enum_Comp,
&Next_Record->variant.var_1.Enum_Comp);
Next_Record->Ptr_Comp = Ptr_Glob->Ptr_Comp;
Proc_7 (Next_Record->variant.var_1.Int_Comp, 10,
&Next_Record->variant.var_1.Int_Comp);
}
else /* not executed */
structassign (*Ptr_Val_Par, *Ptr_Val_Par->Ptr_Comp);
} /* Proc_1 */
Proc_2 (Int_Par_Ref)
/******************/
/* executed once */
/* *Int_Par_Ref == 1, becomes 4 */
One_Fifty *Int_Par_Ref;
{
One_Fifty Int_Loc;
Enumeration Enum_Loc;
Int_Loc = *Int_Par_Ref + 10;
do /* executed once */
if (Ch_1_Glob == 'A')
/* then, executed */
{
Int_Loc -= 1;
*Int_Par_Ref = Int_Loc - Int_Glob;
Enum_Loc = Ident_1;
} /* if */
while (Enum_Loc != Ident_1); /* true */
} /* Proc_2 */
Proc_3 (Ptr_Ref_Par)
/******************/
/* executed once */
/* Ptr_Ref_Par becomes Ptr_Glob */
Rec_Pointer *Ptr_Ref_Par;
{
if (Ptr_Glob != Null)
/* then, executed */
*Ptr_Ref_Par = Ptr_Glob->Ptr_Comp;
Proc_7 (10, Int_Glob, &Ptr_Glob->variant.var_1.Int_Comp);
} /* Proc_3 */
Proc_4 () /* without parameters */
/*******/
/* executed once */
{
Boolean Bool_Loc;
Bool_Loc = Ch_1_Glob == 'A';
Bool_Glob = Bool_Loc | Bool_Glob;
Ch_2_Glob = 'B';
} /* Proc_4 */
Proc_5 () /* without parameters */
/*******/
/* executed once */
{
Ch_1_Glob = 'A';
Bool_Glob = false;
} /* Proc_5 */
/* Procedure for the assignment of structures, */
/* if the C compiler doesn't support this feature */
#ifdef NOSTRUCTASSIGN
memcpy (d, s, l)
register char *d;
register char *s;
register int l;
{
while (l--) *d++ = *s++;
}
#endif

192
FIRMWARE/DHRYSTONE/dhry_2.c Normal file
View File

@@ -0,0 +1,192 @@
/*
****************************************************************************
*
* "DHRYSTONE" Benchmark Program
* -----------------------------
*
* Version: C, Version 2.1
*
* File: dhry_2.c (part 3 of 3)
*
* Date: May 25, 1988
*
* Author: Reinhold P. Weicker
*
****************************************************************************
*/
#include "dhry.h"
#ifndef REG
#define REG
/* REG becomes defined as empty */
/* i.e. no register variables */
#endif
extern int Int_Glob;
extern char Ch_1_Glob;
Proc_6 (Enum_Val_Par, Enum_Ref_Par)
/*********************************/
/* executed once */
/* Enum_Val_Par == Ident_3, Enum_Ref_Par becomes Ident_2 */
Enumeration Enum_Val_Par;
Enumeration *Enum_Ref_Par;
{
*Enum_Ref_Par = Enum_Val_Par;
if (! Func_3 (Enum_Val_Par))
/* then, not executed */
*Enum_Ref_Par = Ident_4;
switch (Enum_Val_Par)
{
case Ident_1:
*Enum_Ref_Par = Ident_1;
break;
case Ident_2:
if (Int_Glob > 100)
/* then */
*Enum_Ref_Par = Ident_1;
else *Enum_Ref_Par = Ident_4;
break;
case Ident_3: /* executed */
*Enum_Ref_Par = Ident_2;
break;
case Ident_4: break;
case Ident_5:
*Enum_Ref_Par = Ident_3;
break;
} /* switch */
} /* Proc_6 */
Proc_7 (Int_1_Par_Val, Int_2_Par_Val, Int_Par_Ref)
/**********************************************/
/* executed three times */
/* first call: Int_1_Par_Val == 2, Int_2_Par_Val == 3, */
/* Int_Par_Ref becomes 7 */
/* second call: Int_1_Par_Val == 10, Int_2_Par_Val == 5, */
/* Int_Par_Ref becomes 17 */
/* third call: Int_1_Par_Val == 6, Int_2_Par_Val == 10, */
/* Int_Par_Ref becomes 18 */
One_Fifty Int_1_Par_Val;
One_Fifty Int_2_Par_Val;
One_Fifty *Int_Par_Ref;
{
One_Fifty Int_Loc;
Int_Loc = Int_1_Par_Val + 2;
*Int_Par_Ref = Int_2_Par_Val + Int_Loc;
} /* Proc_7 */
Proc_8 (Arr_1_Par_Ref, Arr_2_Par_Ref, Int_1_Par_Val, Int_2_Par_Val)
/*********************************************************************/
/* executed once */
/* Int_Par_Val_1 == 3 */
/* Int_Par_Val_2 == 7 */
Arr_1_Dim Arr_1_Par_Ref;
Arr_2_Dim Arr_2_Par_Ref;
int Int_1_Par_Val;
int Int_2_Par_Val;
{
REG One_Fifty Int_Index;
REG One_Fifty Int_Loc;
Int_Loc = Int_1_Par_Val + 5;
Arr_1_Par_Ref [Int_Loc] = Int_2_Par_Val;
Arr_1_Par_Ref [Int_Loc+1] = Arr_1_Par_Ref [Int_Loc];
Arr_1_Par_Ref [Int_Loc+30] = Int_Loc;
for (Int_Index = Int_Loc; Int_Index <= Int_Loc+1; ++Int_Index)
Arr_2_Par_Ref [Int_Loc] [Int_Index] = Int_Loc;
Arr_2_Par_Ref [Int_Loc] [Int_Loc-1] += 1;
Arr_2_Par_Ref [Int_Loc+20] [Int_Loc] = Arr_1_Par_Ref [Int_Loc];
Int_Glob = 5;
} /* Proc_8 */
Enumeration Func_1 (Ch_1_Par_Val, Ch_2_Par_Val)
/*************************************************/
/* executed three times */
/* first call: Ch_1_Par_Val == 'H', Ch_2_Par_Val == 'R' */
/* second call: Ch_1_Par_Val == 'A', Ch_2_Par_Val == 'C' */
/* third call: Ch_1_Par_Val == 'B', Ch_2_Par_Val == 'C' */
Capital_Letter Ch_1_Par_Val;
Capital_Letter Ch_2_Par_Val;
{
Capital_Letter Ch_1_Loc;
Capital_Letter Ch_2_Loc;
Ch_1_Loc = Ch_1_Par_Val;
Ch_2_Loc = Ch_1_Loc;
if (Ch_2_Loc != Ch_2_Par_Val)
/* then, executed */
return (Ident_1);
else /* not executed */
{
Ch_1_Glob = Ch_1_Loc;
return (Ident_2);
}
} /* Func_1 */
Boolean Func_2 (Str_1_Par_Ref, Str_2_Par_Ref)
/*************************************************/
/* executed once */
/* Str_1_Par_Ref == "DHRYSTONE PROGRAM, 1'ST STRING" */
/* Str_2_Par_Ref == "DHRYSTONE PROGRAM, 2'ND STRING" */
Str_30 Str_1_Par_Ref;
Str_30 Str_2_Par_Ref;
{
REG One_Thirty Int_Loc;
Capital_Letter Ch_Loc;
Int_Loc = 2;
while (Int_Loc <= 2) /* loop body executed once */
if (Func_1 (Str_1_Par_Ref[Int_Loc],
Str_2_Par_Ref[Int_Loc+1]) == Ident_1)
/* then, executed */
{
Ch_Loc = 'A';
Int_Loc += 1;
} /* if, while */
if (Ch_Loc >= 'W' && Ch_Loc < 'Z')
/* then, not executed */
Int_Loc = 7;
if (Ch_Loc == 'R')
/* then, not executed */
return (true);
else /* executed */
{
if (strcmp (Str_1_Par_Ref, Str_2_Par_Ref) > 0)
/* then, not executed */
{
Int_Loc += 7;
Int_Glob = Int_Loc;
return (true);
}
else /* executed */
return (false);
} /* if Ch_Loc */
} /* Func_2 */
Boolean Func_3 (Enum_Par_Val)
/***************************/
/* executed once */
/* Enum_Par_Val == Ident_3 */
Enumeration Enum_Par_Val;
{
Enumeration Enum_Loc;
Enum_Loc = Enum_Par_Val;
if (Enum_Loc == Ident_3)
/* then, executed */
return (true);
else /* not executed */
return (false);
} /* Func_3 */

View File

@@ -0,0 +1,56 @@
#include <stdint.h>
#include <perf.h>
uint64_t time() {
return rdcycle();
}
uint64_t insn() {
return rdinstret();
}
char *strcpy(char *dest, const char *src) {
char* result = dest;
while(*dest++=*src++);
return result;
}
int strcmp (const char *p1, const char *p2) {
const unsigned char *s1 = (const unsigned char *) p1;
const unsigned char *s2 = (const unsigned char *) p2;
unsigned char c1, c2;
do {
c1 = (unsigned char) *s1++;
c2 = (unsigned char) *s2++;
if (c1 == '\0') {
return c1 - c2;
}
}
while (c1 == c2);
return c1 - c2;
}
/*************************************************************/
// Print "fixed point" number (integer/1000)
void printk(uint64_t kx) {
int intpart = (int)(kx / 1000);
int fracpart = (int)(kx % 1000);
printf("%d.",intpart);
if(fracpart<100) {
printf("0");
}
if(fracpart<10) {
printf("0");
}
printf("%d",fracpart);
}
void show_CPI_2() {
uint64_t instret = rdinstret();
uint64_t cycles = rdcycle();
uint64_t kCPI = cycles*1000/instret;
printf(">>> CPI ="); printk(kCPI); printf("\n");
printf(">>> instret = %d\n", (int)(instret));
printf(">>> cycles = %d\n", (int)(cycles));
}

460
FIRMWARE/GL_tty.h Normal file
View File

@@ -0,0 +1,460 @@
/**
* ansi_graphics.h
* A couple of function to display graphics in the terminal,
* using ansi sequences.
* Bruno Levy, Jan 2024
*/
#include <stdio.h>
#include <math.h>
#include <stdint.h>
#include <stdlib.h>
#ifndef GL_FPS
#define GL_FPS 30
#endif
#if defined(__linux__) || defined(_WIN32) || defined(__APPLE__)
#define BIGCPU // we are compiling for a real machine
#else
#define TINYCPU // we are compiling for a softwore
#endif
#ifdef __linux__
#include <unistd.h> // for usleep()
#endif
// You can define GL_width and GL_height before
// #including ansi_graphics.h in case the plain
// old 80x25 pixels does not suffice.
#ifndef GL_width
#define GL_width 80
#endif
#ifndef GL_height
#define GL_height 25
#endif
/**
* \brief Sets the current graphics position
* \param[in] x typically in 0,79
* \param[in] y typically in 0,24
*/
static inline void GL_gotoxy(int x, int y) {
printf("\033[%d;%dH",y,x);
}
/**
* \brief Sets the current graphics position
* \param[in] R , G , B the RGB color of the pixel, in [0..255]
* \details Typically used by programs that draw all pixels sequentially,
* like a raytracer. After each line, one can either printf("\n") or
* call GL_gotoxy(). If you want to draw individual pixels in an
* arbitrary order, use GL_setpixelRGB(x,y,R,G,B)
*/
static inline void GL_setpixelRGBhere(uint8_t R, uint8_t G, uint8_t B) {
// set background color, print space
printf("\033[48;2;%d;%d;%dm ",(int)R,(int)G,(int)B);
}
/**
* \brief Draws two "pixels" at the current
* cursor position and advances the current cursor
* position.
* \details Characters are roughly twice as high as wide.
* To generate square pixels, this function draws two pixels in
* the same character, using the special lower-half white / upper-half
* black character, and setting the background and foreground colors.
*/
static inline void GL_set2pixelsRGBhere(
uint8_t r1, uint8_t g1, uint8_t b1,
uint8_t r2, uint8_t g2, uint8_t b2
) {
if((r2 == r1) && (g2 == g1) && (b2 == b1)) {
GL_setpixelRGBhere(r1,g1,b1);
} else {
printf("\033[48;2;%d;%d;%dm",(int)r1,(int)g1,(int)b1);
printf("\033[38;2;%d;%d;%dm",(int)r2,(int)g2,(int)b2);
// https://www.w3.org/TR/xml-entity-names/025.html
// https://onlineunicodetools.com/convert-unicode-to-utf8
// https://copypastecharacter.com/
printf("\xE2\x96\x83");
}
}
#define GL_RGB(R,G,B) #R ";" #G ";" #B
static inline void GL_setpixelIhere(
const char** cmap, int c
) {
// set background color, print space
printf("\033[48;2;%sm ",cmap[c]);
}
static inline void GL_set2pixelsIhere(
const char** cmap, int c1, int c2
) {
if(c1 == c2) {
GL_setpixelIhere(cmap, c1);
} else {
printf("\033[48;2;%sm",cmap[c1]);
printf("\033[38;2;%sm",cmap[c2]);
// https://www.w3.org/TR/xml-entity-names/025.html
// https://onlineunicodetools.com/convert-unicode-to-utf8
// https://copypastecharacter.com/
printf("\xE2\x96\x83");
}
}
/**
* \brief Moves the cursor position to the next line.
* \details Background and foreground colors are set to black.
*/
static inline void GL_newline() {
printf("\033[38;2;0;0;0m");
printf("\033[48;2;0;0;0m\n");
}
/**
* \brief Sets the color of a pixel
* \param[in] x typically in 0,79
* \param[in] y typically in 0,24
* \param[in] R , G , B the RGB color of the pixel, in [0..255]
*/
static inline void GL_setpixelRGB(
int x, int y, uint8_t R, uint8_t G, uint8_t B
) {
GL_gotoxy(x,y);
GL_setpixelRGBhere(R,G,B);
}
/**
* \brief restore default foreground and background colors
*/
static inline void GL_restore_default_colors() {
printf(
"\033[48;5;16m" // set background color black
"\033[38;5;15m" // set foreground color white
);
}
/**
* \brief Call this function each time graphics should be cleared
*/
static inline void GL_clear() {
GL_restore_default_colors();
printf("\033[2J"); // clear screen
}
/**
* \brief Moves current drawing position to top-left corner
* \see GL_setpixelRGBhere() and GL_set2pixelsRGBhere()
*/
static inline void GL_home() {
printf("\033[H");
}
/**
* \brief Call this function before starting drawing graphics
* or each time graphics should be cleared
*/
static inline void GL_init() {
printf("\033[?25l"); // hide cursor
GL_home();
GL_clear();
}
/**
* \brief Call this function at the end of the program
*/
static inline void GL_terminate() {
GL_restore_default_colors();
GL_gotoxy(0,GL_height);
printf("\033[?25h"); // show cursor
}
/**
* \brief Flushes pending graphic operations and waits a bit
*/
static inline void GL_swapbuffers() {
// only flush if we are on a big machine, with true stdio support
// otherwise does nothing (because our small MCU io lib is not buffered)
#ifdef BIGCPU
fflush(stdout);
#endif
#ifdef __linux__
usleep(1000000/GL_FPS);
#endif
}
typedef void (*GL_pixelfunc_RGB)(int x, int y, uint8_t* r, uint8_t* g, uint8_t* b);
typedef void (*GL_pixelfunc_RGBf)(int x, int y, float* r, float* g, float* b);
/**
* \brief Draws an image by calling a user-specified function for each pixel.
* \param[in] width , height dimension of the image in square pixels
* \param[in] do_pixel the user function to be called for each pixel
* (a "shader"), that determines the (integer) components r,g,b of
* the pixel's color.
* \details Uses half-charater pixels.
*/
static inline void GL_scan_RGB(
int width, int height, GL_pixelfunc_RGB do_pixel
) {
uint8_t r1, g1, b1;
uint8_t r2, g2, b2;
GL_home();
for (int j = 0; j<height; j+=2) {
for (int i = 0; i<width; i++) {
do_pixel(i,j , &r1, &g1, &b1);
do_pixel(i,j+1, &r2, &g2, &b2);
GL_set2pixelsRGBhere(r1,g1,b1,r2,g2,b2);
if(i == width-1) {
GL_newline();
}
}
}
}
/**
* brief Converts a floating point value to a byte.
* \param[in] the floating point value in [0,1]
* \return the byte, in [0,255]
* \details the input value is clamped to [0,1]
*/
static inline uint8_t GL_ftoi(float f) {
f = (f < 0.0f) ? 0.0f : f;
f = (f > 1.0f) ? 1.0f : f;
return (uint8_t)(255.0f * f);
}
/**
* \brief Draws an image by calling a user-specified function for each pixel.
* \param[in] width , height dimension of the image in square pixels
* \param[in] do_pixel the user function to be called for each pixel
* (a "shader"), that determines the (floating-point) components
* fr,fg,fb of the pixel's color.
* \details Uses half-charater pixels.
*/
static inline void GL_scan_RGBf(
int width, int height, GL_pixelfunc_RGBf do_pixel
) {
float fr1, fg1, fb1;
float fr2, fg2, fb2;
uint8_t r1, g1, b1;
uint8_t r2, g2, b2;
GL_home();
for (int j = 0; j<height; j+=2) {
for (int i = 0; i<width; i++) {
do_pixel(i,j , &fr1, &fg1, &fb1);
r1 = GL_ftoi(fr1);
g1 = GL_ftoi(fg1);
b1 = GL_ftoi(fb1);
do_pixel(i,j+1, &fr2, &fg2, &fb2);
r2 = GL_ftoi(fr2);
g2 = GL_ftoi(fg2);
b2 = GL_ftoi(fb2);
GL_set2pixelsRGBhere(r1,g1,b1,r2,g2,b2);
if(i == width-1) {
GL_newline();
}
}
}
}
/***************************************************************/
#define INSIDE 0
#define LEFT 1
#define RIGHT 2
#define BOTTOM 4
#define TOP 8
#define XMIN 0
#define XMAX (GL_width-1)
#define YMIN 0
#define YMAX (GL_height-1)
#define code(x,y) \
((x) < XMIN) | (((x) > XMAX)<<1) | (((y) < YMIN)<<2) | (((y) > YMAX)<<3)
/***************************************************************/
static inline void GL_line(
int x1, int y1, int x2, int y2, int R, int G, int B
) {
int x,y,dx,dy,sx,sy,tmp;
/* Cohen-Sutherland line clipping. */
int code1 = code(x1,y1);
int code2 = code(x2,y2);
int codeout;
for(;;) {
/* Both points inside. */
if(code1 == 0 && code2 == 0) {
break;
}
/* No point inside. */
if(code1 & code2) {
return;
}
/* One of the points is outside. */
codeout = code1 ? code1 : code2;
/* Compute intersection. */
if (codeout & TOP) {
x = x1 + (x2 - x1) * (YMAX - y1) / (y2 - y1);
y = YMAX;
} else if (codeout & BOTTOM) {
x = x1 + (x2 - x1) * (YMIN - y1) / (y2 - y1);
y = YMIN;
} else if (codeout & RIGHT) {
y = y1 + (y2 - y1) * (XMAX - x1) / (x2 - x1);
x = XMAX;
} else if (codeout & LEFT) {
y = y1 + (y2 - y1) * (XMIN - x1) / (x2 - x1);
x = XMIN;
}
/* Replace outside point with intersection. */
if (codeout == code1) {
x1 = x;
y1 = y;
code1 = code(x1,y1);
} else {
x2 = x;
y2 = y;
code2 = code(x2,y2);
}
}
// Swap both extremities to ensure x increases
if(x2 < x1) {
tmp = x2;
x2 = x1;
x1 = tmp;
tmp = y2;
y2 = y1;
y1 = tmp;
}
// Bresenham line drawing.
dy = y2 - y1;
sy = 1;
if(dy < 0) {
sy = -1;
dy = -dy;
}
dx = x2 - x1;
x = x1;
y = y1;
if(dy > dx) {
int ex = (dx << 1) - dy;
for(int u=0; u<dy; u++) {
GL_setpixelRGB(x,y,R,G,B);
y += sy;
if(ex >= 0) {
x++;
ex -= dy << 1;
GL_setpixelRGB(x,y,R,G,B);
}
while(ex >= 0) {
x++;
ex -= dy << 1;
putchar(' ');
}
ex += dx << 1;
}
} else {
int ey = (dy << 1) - dx;
for(int u=0; u<dx; u++) {
GL_setpixelRGB(x,y,R,G,B);
x++;
while(ey >= 0) {
y += sy;
ey -= dx << 1;
GL_setpixelRGB(x,y,R,G,B);
}
ey += dy << 1;
}
}
}
/***************************************************************/
#ifdef GL_USE_TURTLE
#include "sintab.h" // Ugly !!!
typedef struct {
int x; // in [0..79]
int y; // in [0..24]
int angle; // in degrees
int R,G,B; // pen color
int pendown; // draw if non-zero
} Turtle;
static inline void Turtle_init(Turtle* T) {
T->x = GL_width/2;
T->y = GL_height/2;
T->angle = -90;
T->pendown = 1;
T->R = 255;
T->G = 255;
T->B = 255;
}
static inline void Turtle_pen_up(Turtle* T) {
T->pendown = 0;
}
static inline void Turtle_pen_down(Turtle* T) {
T->pendown = 1;
}
static inline void Turtle_pen_color(Turtle* T, int R, int G, int B) {
T->R = R;
T->G = G;
T->B = B;
}
static inline void Turtle_forward(Turtle* T, int distance) {
int last_x = T->x;
int last_y = T->y;
int a = T->angle;
while(a < 0) {
a += 360;
}
while(a > 360) {
a -= 360;
}
T->x += (costab[a] * distance) / 256;
T->y += (sintab[a] * distance) / 256;
if(T->pendown) {
GL_line(last_x, last_y, T->x, T->y, T->R, T->G, T->B);
}
}
static inline void Turtle_backward(Turtle* T, int distance) {
Turtle_forward(T,-distance);
}
static inline void Turtle_turn_right(Turtle* T, int delta_angle) {
T->angle += delta_angle;
}
static inline void Turtle_turn_left(Turtle* T, int delta_angle) {
Turtle_turn_right(T, -delta_angle);
}
#endif

80
FIRMWARE/Makefile Normal file
View File

@@ -0,0 +1,80 @@
include ../../../FIRMWARE/makefile.inc
RVASFLAGS=-march=$(ARCH) -mabi=$(ABI)
RVCFLAGS=-I. -O2 -fno-pic -march=$(ARCH) -mabi=$(ABI) -fno-stack-protector -w -Wl,--no-relax
RAM_SIZE=6144
LIBOBJECTS=putchar.o wait.o print.o memcpy.o errno.o perf.o
%.bram.elf: %.o start.o $(LIBOBJECTS) $(RV_BINARIES)
$(RVLD) -T bram.ld -m elf32lriscv -nostdlib -norelax $< $(LIBOBJECTS) $(RVTOOLCHAIN_GCC_LIB_DIR)/libgcc.a -o $@
%.hex: %.elf $(FIRMWARE_DIR)/TOOLS/firmware_words
$(FIRMWARE_DIR)/TOOLS/firmware_words $< -ram $(RAM_SIZE) -max_addr $(RAM_SIZE) -out $@
cp $@ ../firmware.hex
mkdir -p ../obj_dir
cp $@ ../obj_dir/firmware.hex
echo $@ > ../firmware.txt
# SPI FLASH 0 (sends everything to SPI flash)
%.spiflash0.elf: %.o start.o $(LIBOBJECTS) $(RV_BINARIES)
$(RVLD) -T spiflash0.ld -m elf32lriscv -nostdlib -norelax $< $(LIBOBJECTS) $(RVTOOLCHAIN_GCC_LIB_DIR)/libgcc.a -o $@
%.spiflash0.bin: %.spiflash0.elf
$(RVOBJCOPY) $< $@ -O binary
%.spiflash0.prog: %.spiflash0.bin
iceprog -o 128k $<
# SPI FLASH 1 (sends code and variables initialization to SPI flash, variables to RAM)
%.spiflash1.elf: %.o start_spiflash1.o $(LIBOBJECTS) $(RV_BINARIES)
$(RVLD) -T spiflash1.ld -m elf32lriscv -nostdlib -norelax $< $(LIBOBJECTS) $(RVTOOLCHAIN_GCC_LIB_DIR)/libgcc.a -o $@
%.spiflash1.bin: %.spiflash1.elf
$(RVOBJCOPY) $< $@ -O binary
%.spiflash1.prog: %.spiflash1.bin
iceprog -o 128k $<
# SPI FLASH 2 (sends code and variables initialization to SPI flash, variables and fastcode to RAM)
%.spiflash2.elf: %.o start_spiflash1.o $(LIBOBJECTS) $(RV_BINARIES)
$(RVLD) -T spiflash2.ld -m elf32lriscv -nostdlib -norelax $< $(LIBOBJECTS) -L$(RVTOOLCHAIN_LIB_DIR) -lm $(RVTOOLCHAIN_GCC_LIB_DIR)/libgcc.a -o $@
%.spiflash2.bin: %.spiflash2.elf
$(RVOBJCOPY) $< $@ -O binary
%.spiflash2.prog: %.spiflash2.bin
iceprog -o 128k $<
%.spiflash2.list: %.spiflash2.elf
$(RVOBJDUMP) -Mnumeric -D $< > $@
# DUAL MEMORY (64 kb program ROM, 64 kb data RAM)
%.pipeline.elf: %.o start_pipeline.o $(LIBOBJECTS) $(RV_BINARIES)
$(RVLD) -T pipeline.ld -m elf32lriscv -nostdlib -norelax $< $(LIBOBJECTS) -L$(RVTOOLCHAIN_LIB_DIR) -lm $(RVTOOLCHAIN_GCC_LIB_DIR)/libgcc.a -o $@
$(RVOBJDUMP) -Mnumeric -D $@ > $@.list
%.PROGROM.hex: %.pipeline.elf $(FIRMWARE_DIR)/TOOLS/firmware_words
$(FIRMWARE_DIR)/TOOLS/firmware_words $< -ram 0x20000 -max_addr 0x20000 -out $@ -from_addr 0 -to_addr 0xFFFF
cp $@ ../PROGROM.hex
mkdir -p ../obj_dir
cp $@ ../obj_dir/PROGROM.hex
%.DATARAM.hex: %.pipeline.elf $(FIRMWARE_DIR)/TOOLS/firmware_words
$(FIRMWARE_DIR)/TOOLS/firmware_words $< -ram 0x20000 -max_addr 0x20000 -out $@ -from_addr 0x10000 -to_addr 0x1FFFF
cp $@ ../DATARAM.hex
mkdir -p ../obj_dir
cp $@ ../obj_dir/DATARAM.hex
%.pipeline.hex: %.PROGROM.hex %.DATARAM.hex
echo $@ > ../firmware.txt

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

480
FIRMWARE/ST_NICCC.c Normal file
View File

@@ -0,0 +1,480 @@
/*
* Reading the ST-NICCC megademo data stored in
* the SPI flash and streaming it to polygons,
* rendered as ANSI character sequences through
* the UART.
*
* The polygon stream is a 640K file, that needs
* to be stored in the SPI flash, using:
* ICEStick: iceprog -o 1M EXAMPLES/DATA/scene1.dat
* ULX3S: cp EXAMPLES/DATA/scene1.dat scene1.img
* ujprog -j flash -f 1048576 scene1.img
* (using latest version of ujprog compiled from https://github.com/kost/fujprog)
*
* More details and links in EXAMPLES/DATA/notes.txt
*/
#include <stdint.h>
#include <stdio.h>
#ifdef __linux__
#include <stdlib.h>
#include <unistd.h>
#else
#include "io.h"
#endif
// when compiling for SPI flash, uncomment to fit some routines in fast BRAM
// (but it does not change much, the bottleneck is ANSI RGB encoding and uart.
//#define RV32_FASTCODE __attribute((section(".fastcode")))
#define RV32_FASTCODE
// when compiling for SPI flash, uncomment to enable wireframe mode (but it is ugly
// and it will not fit in BRAM !)
// #define WITH_WIREFRAME
#ifdef WITH_WIREFRAME
int wireframe = 0;
#endif
#define MIN(x,y) ((x) < (y) ? (x) : (y))
#define MAX(x,y) ((x) > (y) ? (x) : (y))
/**********************************************************************************/
/* Graphics routines */
/**********************************************************************************/
// Map coordinates from file to screen
static inline uint8_t map_x(uint8_t x) {
return x >> 1;
}
static inline uint8_t map_y(uint8_t y) {
return y >> 2;
}
void GL_clear() {
printf("\033[48;5;16m" // set background color black
"\033[2J"); // clear screen
}
/*
* Set background color using 6x6x6 colorcube codes
* see https://stackoverflow.com/questions/4842424/list-of-ansi-color-escape-sequences
*/
static inline void GL_setcolor(int color) {
static int last_color = -1;
if(color != last_color) {
printf("\033[48;5;%dm",color);
}
last_color = color;
}
static inline void GL_setpixel(int x, int y) {
printf("\033[%d;%dH ",y,x); // Goto_XY(x1,y) and print space
}
#ifdef WITH_WIREFRAME
void GL_line(int x1, int y1, int x2, int y2) RV32_FASTCODE;
void GL_line(int x1, int y1, int x2, int y2) {
int x,y,dx,dy,sy,tmp;
// Swap both extremities to ensure x increases
if(x2 < x1) {
tmp = x2;
x2 = x1;
x1 = tmp;
tmp = y2;
y2 = y1;
y1 = tmp;
}
// Bresenham line drawing.
dy = y2 - y1;
sy = 1;
if(dy < 0) {
sy = -1;
dy = -dy;
}
dx = x2 - x1;
x = x1;
y = y1;
if(dy > dx) {
int ex = (dx << 1) - dy;
for(int u=0; u<dy; u++) {
GL_setpixel(x,y);
y += sy;
if(ex >= 0) {
x++;
ex -= dy << 1;
GL_setpixel(x,y);
}
while(ex >= 0) {
x++;
ex -= dy << 1;
putchar(' ');
}
ex += dx << 1;
}
} else {
int ey = (dy << 1) - dx;
for(int u=0; u<dx; u++) {
GL_setpixel(x,y);
x++;
while(ey >= 0) {
y += sy;
ey -= dx << 1;
GL_setpixel(x,y);
}
ey += dy << 1;
}
}
}
#endif
void GL_fillpoly(int nb_pts, int* points) RV32_FASTCODE;
void GL_fillpoly(int nb_pts, int* points) {
static int last_color = -1;
char x_left[128];
char x_right[128];
/* Determine clockwise, miny, maxy */
int clockwise = 0;
int miny = 256;
int maxy = -256;
for(int i1=0; i1<nb_pts; ++i1) {
int i2=(i1==nb_pts-1) ? 0 : i1+1;
int i3=(i2==nb_pts-1) ? 0 : i2+1;
int x1 = points[2*i1];
int y1 = points[2*i1+1];
int dx1 = points[2*i2] - x1;
int dy1 = points[2*i2+1] - y1;
int dx2 = points[2*i3] - x1;
int dy2 = points[2*i3+1] - y1;
clockwise += dx1 * dy2 - dx2 * dy1;
miny = MIN(miny,y1);
maxy = MAX(maxy,y1);
}
/* Determine x_left and x_right for each scaline */
for(int i1=0; i1<nb_pts; ++i1) {
int i2=(i1==nb_pts-1) ? 0 : i1+1;
int x1 = points[2*i1];
int y1 = points[2*i1+1];
int x2 = points[2*i2];
int y2 = points[2*i2+1];
#ifdef WITH_WIREFRAME
if(wireframe) {
if((clockwise > 0) ^ (y2 > y1)) {
GL_line(x1,y1,x2,y2);
}
continue;
}
#endif
char* x_buffer = ((clockwise > 0) ^ (y2 > y1)) ? x_left : x_right;
int dx = x2 - x1;
int sx = 1;
int dy = y2 - y1;
int sy = 1;
int x = x1;
int y = y1;
int ex;
if(dx < 0) {
sx = -1;
dx = -dx;
}
if(dy < 0) {
sy = -1;
dy = -dy;
}
if(y1 == y2) {
x_left[y1] = MIN(x1,x2);
x_right[y1] = MAX(x1,x2);
continue;
}
ex = (dx << 1) - dy;
for(int u=0; u <= dy; ++u) {
x_buffer[y] = x;
y += sy;
while(ex >= 0) {
x += sx;
ex -= dy << 1;
}
ex += dx << 1;
}
}
#ifdef WITH_WIREFRAME
if(!wireframe)
#endif
{
for(int y = miny; y <= maxy; ++y) {
int x1 = x_left[y];
int x2 = x_right[y];
printf("\033[%d;%dH",y,x1); // Goto_XY(x1,y)
for(int x=x1; x<x2; ++x) {
putchar(' ');
}
}
}
}
/**********************************************************************************/
/*
* Starting address of data stream stored in the
* SPI.
* I put the data stream starting from 1M offset,
* just to make sure it does not collide with
* FPGA wiring configuration ! (but FPGA configuration
* only takes a few tenth of kilobytes I think).
* Using the IO interface, it is using the physical address
* (starting at 1M). Using the mapped memory interface,
* SPI_FLASH_BASE is mapped to 1M.
*/
uint32_t spi_addr = 0;
/*
* Word address and cached word used in mapped mode
*/
uint32_t spi_word_addr = 0;
union {
uint32_t spi_word;
uint8_t spi_bytes[4];
} spi_u;
#define ADDR_OFFSET 1024*1024
/*
* Restarts reading from the beginning of the stream.
*/
void spi_reset() {
spi_addr = ADDR_OFFSET;
spi_word_addr = (uint32_t)(-1);
}
#ifdef __linux__
FILE* f = NULL;
/**
* Reads one byte of data from the file (emulates read_spi_byte() when running on desktop)
*/
uint8_t next_spi_byte() {
uint8_t result;
if(f == NULL) {
f = fopen("../../../FIRMWARE/EXAMPLES/DATA/scene1.dat","rb");
if(f == NULL) {
printf("Could not open data file\n");
exit(-1);
}
}
if(spi_word_addr != spi_addr >> 2) {
spi_word_addr = spi_addr >> 2;
fseek(f, spi_word_addr*4-ADDR_OFFSET, SEEK_SET);
fread(&(spi_u.spi_word), 4, 1, f);
}
result = spi_u.spi_bytes[spi_addr&3];
++spi_addr;
return (uint8_t)(result);
}
#else
# define SPI_FLASH_BASE ((uint32_t*)(1 << 23))
/**
* Reads one byte from the SPI flash, using the mapped SPI flash interface.
*/
static inline uint8_t next_spi_byte() {
uint8_t result;
if(spi_word_addr != spi_addr >> 2) {
spi_word_addr = spi_addr >> 2;
spi_u.spi_word = SPI_FLASH_BASE[spi_word_addr];
}
result = spi_u.spi_bytes[spi_addr&3];
++spi_addr;
return (uint8_t)(result);
}
#endif
static inline uint16_t next_spi_word() {
/* In the ST-NICCC file,
* words are stored in big endian format.
* (see DATA/scene_description.txt).
*/
uint16_t hi = (uint16_t)next_spi_byte();
uint16_t lo = (uint16_t)next_spi_byte();
return (hi << 8) | lo;
}
/*
* The colormap, encoded in such a way that it
* can be directly sent as ANSI color codes.
*/
int cmap[16];
/*
* Current frame's vertices coordinates (if frame is indexed),
* mapped to OLED display dimensions (divide by 2 from file).
*/
uint8_t X[255];
uint8_t Y[255];
/*
* Current polygon vertices, as expected
* by GL_fillpoly():
* xi = poly[2*i], yi = poly[2*i+1]
*/
int poly[30];
/*
* Masks for frame flags.
*/
#define CLEAR_BIT 1
#define PALETTE_BIT 2
#define INDEXED_BIT 4
/*
* Reads a frame's polygonal description from
* SPI flash and rasterizes the polygons using
* FemtoGL.
* returns 0 if last frame.
* See DATA/scene_description.txt for the
* ST-NICCC file format.
* See DATA/test_ST_NICCC.c for an example
* program.
*/
int read_frame() RV32_FASTCODE;
int read_frame() {
uint8_t frame_flags = next_spi_byte();
// Update palette data.
if(frame_flags & PALETTE_BIT) {
uint16_t colors = next_spi_word();
for(int b=15; b>=0; --b) {
if(colors & (1 << b)) {
int rgb = next_spi_word();
// Get the three 3-bits per component R,G,B
int b3 = (rgb & 0x007);
int g3 = (rgb & 0x070) >> 4;
int r3 = (rgb & 0x700) >> 8;
// Re-encode them as ANSI 8-bits color
b3 = b3 * 6 / 8;
g3 = g3 * 6 / 8;
r3 = r3 * 6 / 8;
cmap[15-b] = 16 + b3 + 6*(g3 + 6*r3);
}
}
}
if(frame_flags & CLEAR_BIT) {
// GL_clear();
}
// Update vertices
if(frame_flags & INDEXED_BIT) {
uint8_t nb_vertices = next_spi_byte();
for(int v=0; v<nb_vertices; ++v) {
X[v] = map_x(next_spi_byte());
Y[v] = map_y(next_spi_byte());
}
}
// Draw frame's polygons
for(;;) {
uint8_t poly_desc = next_spi_byte();
// Special polygon codes (end of frame,
// seek next block, end of stream)
if(poly_desc == 0xff) {
break; // end of frame
}
if(poly_desc == 0xfe) {
// Go to next 64kb block
spi_addr -= ADDR_OFFSET;
spi_addr &= ~65535;
spi_addr += 65536;
spi_addr += ADDR_OFFSET;
return 1;
}
if(poly_desc == 0xfd) {
return 0; // end of stream
}
uint8_t nvrtx = poly_desc & 15;
uint8_t poly_col = poly_desc >> 4;
for(int i=0; i<nvrtx; ++i) {
if(frame_flags & INDEXED_BIT) {
uint8_t index = next_spi_byte();
poly[2*i] = X[index];
poly[2*i+1] = Y[index];
} else {
poly[2*i] = map_x(next_spi_byte());
poly[2*i+1] = map_y(next_spi_byte());
}
}
GL_setcolor(cmap[poly_col]);
GL_fillpoly(nvrtx,poly);
}
return 1;
}
int main() {
// printf("\x1B[?25l"); // hide cursor
#ifndef __linux__
IO_OUT(IO_LEDS,15);
#endif
printf("starting\n");
#ifdef WITH_WIREFRAME
wireframe = 0;
#endif
int frame = 0;
GL_clear();
for(;;) {
spi_reset();
frame = 0;
while(read_frame()) {
#ifdef WITH_WIREFRAME
if(wireframe) {
GL_clear();
}
#endif
#ifdef __linux__
usleep(20000);
#else
IO_OUT(IO_LEDS,frame);
#endif
++frame;
}
#ifdef WITH_WIREFRAME
wireframe = !wireframe;
#endif
}
}

21
FIRMWARE/blinker.S Normal file
View File

@@ -0,0 +1,21 @@
# Simple blinker
.equ IO_BASE, 0x400000
.equ IO_LEDS, 4
.section .text
.globl main
main:
.L0:
li t0, 5
sw t0, IO_LEDS(gp)
call wait
li t0, 10
sw t0, IO_LEDS(gp)
call wait
j .L0

13
FIRMWARE/bram.ld Normal file
View File

@@ -0,0 +1,13 @@
MEMORY
{
BRAM (RWX) : ORIGIN = 0x0000, LENGTH = 0x1800 /* 6kB RAM */
}
SECTIONS
{
everything :
{
. = ALIGN(4);
start.o (.text)
*(.*)
} >BRAM
}

7
FIRMWARE/dhrystones.c Normal file
View File

@@ -0,0 +1,7 @@
#define RISCV
#define TIME
#define USE_MYSTDLIB
#include "DHRYSTONE/dhry_1.c"
#include "DHRYSTONE/dhry_2.c"
#include "DHRYSTONE/stubs.c"

182
FIRMWARE/donut.c Normal file
View File

@@ -0,0 +1,182 @@
// donut.c by Andy Sloane (@a1k0n)
// https://gist.github.com/a1k0n/8ea6516b4946ab36348fb61703dc3194
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <math.h>
#define WITH_RV32M
#define debug(...)
//#define debug printf
// torus radii and distance from camera
// these are pretty baked-in to other constants now, so it probably won't work
// if you change them too much.
const int dz = 5, r1 = 1, r2 = 2;
// "Magic circle algorithm"? DDA? I've seen this formulation in a few places;
// first in Hal Chamberlain's Musical Applications of Microprocessors, but not
// sure what to call it, or how to justify it theoretically. It seems to
// correctly rotate around a point "near" the origin, without losing magnitude
// over long periods of time, as long as there are enough bits of precision in x
// and y. I use 14 bits here.
#define R(s,x,y) x-=(y>>s); y+=(x>>s)
// CORDIC algorithm to find magnitude of |x,y| by rotating the x,y vector onto
// the x axis. This also brings vector (x2,y2) along for the ride, and writes
// back to x2 -- this is used to rotate the lighting vector from the normal of
// the torus surface towards the camera, and thus determine the lighting amount.
// We only need to keep one of the two lighting normal coordinates.
int length_cordic(int16_t x, int16_t y, int16_t *x2_, int16_t y2) {
int x2 = *x2_;
if (x < 0) { // start in right half-plane
x = -x;
x2 = -x2;
}
for (int i = 0; i < 8; i++) {
int t = x;
int t2 = x2;
if (y < 0) {
x -= y >> i;
y += t >> i;
x2 -= y2 >> i;
y2 += t2 >> i;
} else {
x += y >> i;
y -= t >> i;
x2 += y2 >> i;
y2 -= t2 >> i;
}
}
// divide by 0.625 as a cheap approximation to the 0.607 scaling factor factor
// introduced by this algorithm (see https://en.wikipedia.org/wiki/CORDIC)
*x2_ = (x2 >> 1) + (x2 >> 3);
return (x >> 1) + (x >> 3);
}
void main() {
// high-precision rotation directions, sines and cosines and their products
int16_t sB = 0, cB = 16384;
int16_t sA = 11583, cA = 11583;
int16_t sAsB = 0, cAsB = 0;
int16_t sAcB = 11583, cAcB = 11583;
for (;;) {
int x1_16 = cAcB << 2;
// yes this is a multiply but dz is 5 so it's (sb + (sb<<2)) >> 6 effectively
int p0x = dz * sB >> 6;
int p0y = dz * sAcB >> 6;
int p0z = -dz * cAcB >> 6;
const int r1i = r1*256;
const int r2i = r2*256;
int niters = 0;
int nnormals = 0;
int16_t yincC = (cA >> 6) + (cA >> 5); // 12*cA >> 8;
int16_t yincS = (sA >> 6) + (sA >> 5); // 12*sA >> 8;
int16_t xincX = (cB >> 7) + (cB >> 6); // 6*cB >> 8;
int16_t xincY = (sAsB >> 7) + (sAsB >> 6); // 6*sAsB >> 8;
int16_t xincZ = (cAsB >> 7) + (cAsB >> 6); // 6*cAsB >> 8;
int16_t ycA = -((cA >> 1) + (cA >> 4)); // -12 * yinc1 = -9*cA >> 4;
int16_t ysA = -((sA >> 1) + (sA >> 4)); // -12 * yinc2 = -9*sA >> 4;
//int dmin = INT_MAX, dmax = -INT_MAX;
for (int j = 0; j < 23; j++, ycA += yincC, ysA += yincS) {
int xsAsB = (sAsB >> 4) - sAsB; // -40*xincY
int xcAsB = (cAsB >> 4) - cAsB; // -40*xincZ;
int16_t vxi14 = (cB >> 4) - cB - sB; // -40*xincX - sB;
int16_t vyi14 = ycA - xsAsB - sAcB;
int16_t vzi14 = ysA + xcAsB + cAcB;
for (int i = 0; i < 79; i++, vxi14 += xincX, vyi14 -= xincY, vzi14 += xincZ) {
int t = 512; // (256 * dz) - r2i - r1i;
int16_t px = p0x + (vxi14 >> 5); // assuming t = 512, t*vxi>>8 == vxi<<1
int16_t py = p0y + (vyi14 >> 5);
int16_t pz = p0z + (vzi14 >> 5);
debug("pxyz (%+4d,%+4d,%+4d)\n", px, py, pz);
int16_t lx0 = sB >> 2;
int16_t ly0 = sAcB - cA >> 2;
int16_t lz0 = -cAcB - sA >> 2;
for (;;) {
int t0, t1, t2, d;
int16_t lx = lx0, ly = ly0, lz = lz0;
debug("[%2d,%2d] (px, py) = (%d, %d), (lx, ly) = (%d, %d) -> ", j, i, px, py, lx, ly);
t0 = length_cordic(px, py, &lx, ly);
debug("t0=%d (lx', ly') = (%d, %d)\n", t0, lx, ly);
t1 = t0 - r2i;
t2 = length_cordic(pz, t1, &lz, lx);
d = t2 - r1i;
t += d;
if (t > 8*256) {
putchar(' ');
break;
} else if (d < 2) {
int N = lz >> 9;
putchar(".,-~:;!*=#$@"[N > 0 ? N < 12 ? N : 11 : 0]);
nnormals++;
break;
}
// todo: shift and add version of this
/*
if (d < dmin) dmin = d;
if (d > dmax) dmax = d;
*/
#ifdef WITH_RV32M
px += d*vxi14 >> 14;
py += d*vyi14 >> 14;
pz += d*vzi14 >> 14;
#else
{
// 11x1.14 fixed point 3x parallel multiply
// only 16 bit registers needed; starts from highest bit to lowest
// d is about 2..1100, so 11 bits are sufficient
int16_t dx = 0, dy = 0, dz = 0;
int16_t a = vxi14, b = vyi14, c = vzi14;
while (d) {
if (d&1024) {
dx += a;
dy += b;
dz += c;
}
d = (d&1023) << 1;
a >>= 1;
b >>= 1;
c >>= 1;
}
// we already shifted down 10 bits, so get the last four
px += dx >> 4;
py += dy >> 4;
pz += dz >> 4;
}
#endif
niters++;
}
}
puts("");
}
printf("%d iterations %d lit pixels\x1b[K", niters, nnormals);
// fflush(stdout);
// rotate sines, cosines, and products thereof
// this animates the torus rotation about two axes
R(5, cA, sA);
R(5, cAsB, sAsB);
R(5, cAcB, sAcB);
R(6, cB, sB);
R(6, cAcB, cAsB);
R(6, sAcB, sAsB);
// usleep(15000);
printf("\r\x1b[23A");
}
}

427
FIRMWARE/donut2.c Normal file
View File

@@ -0,0 +1,427 @@
// donut.c by Andy Sloane (@a1k0n)
// https://gist.github.com/a1k0n/8ea6516b4946ab36348fb61703dc3194
// Bruno Levy: added ANSI "pseudo-graphics", and RISC-V statistics
#define CPU_NAME "TordBoyau ULX3S" // Name of your CPU and FPGA board
#define MHZ 95 // Frequency (without a timer we cannot guess)
#define USE_MUL // Define if you support RV32M
// #define PRECISE // Define for a more accurate result (but it costs a bit)
#define START_FRAMES 20 // Number of frames without display
// (for accurate CPI/MIPS measurements)
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <math.h>
// 0 15 31 47 63 79 96 112 127 143 159 175 191 207 223 240 255
const char* colormap[34] = {
"0",
"8;5;232",
"8;5;233",
"8;5;234",
"8;5;235",
"8;5;236",
"8;5;237",
"8;5;238",
"8;5;239",
"8;5;240",
"8;5;241",
"8;5;242",
"8;5;243",
"8;5;244",
"8;5;245",
"8;5;246",
"8;5;247",
"8;5;248",
"8;5;249",
"8;5;250",
"8;5;251",
"8;5;252",
"8;5;253",
"8;5;254",
"8;5;255",
"7",
"8;5;16",
"8;5;17",
"8;5;18",
"8;5;19",
"8;5;20",
"8;5;21",
"8;5;22",
"8;5;23",
};
int prev_color1=0;
int prev_color2=0;
char scanline[80];
#ifdef __linux__
uint64_t my_rdcycle() {
return 0;
}
uint64_t my_rdinstret() {
return 0;
}
#else
uint64_t my_rdcycle() {
uint64_t result;
uint32_t a0,a1,t0;
{
__asm__ __volatile__ ("rdcycleh %0" : "=r" (a1));
__asm__ __volatile__ ("rdcycle %0" : "=r" (a0));
__asm__ __volatile__ ("rdcycleh %0" : "=r" (t0));
} while(t0 != a1);
return ((uint64_t)a1 << 32) | a0;
}
uint64_t my_rdinstret() {
uint64_t result;
uint32_t a0,a1,t0;
{
__asm__ __volatile__ ("rdinstreth %0" : "=r" (a1));
__asm__ __volatile__ ("rdinstret %0" : "=r" (a0));
__asm__ __volatile__ ("rdinstreth %0" : "=r" (t0));
} while(t0 != a1);
return ((uint64_t)a1 << 32) | a0;
}
#endif
uint64_t stats_cycles_init = 0;
uint64_t stats_instructions_init = 0;
uint64_t stats_cycles = 0;
uint64_t stats_instructions = 0;
int stats_CPI_times_1000 = 0;
void stats_start() {
stats_cycles_init = my_rdcycle();
stats_instructions_init = my_rdinstret();
}
void stats_end() {
stats_cycles = my_rdcycle() - stats_cycles_init;
stats_instructions = my_rdinstret() - stats_instructions_init;
if(stats_cycles==0) {
stats_cycles++;
}
if(stats_instructions==0) {
stats_instructions++;
}
stats_CPI_times_1000 = (int)((stats_cycles * 1000)/stats_instructions);
}
// Print "fixed point" number (integer/1000)
static void printk(uint64_t kx) {
int intpart = (int)(kx / 1000);
int fracpart = (int)(kx % 1000);
printf("%d.",intpart);
if(fracpart<100) {
printf("0");
}
if(fracpart<10) {
printf("0");
}
printf("%d",fracpart);
}
static inline void setcolors(int fg, int bg) {
printf("\033[4%s;3%sm",colormap[bg],colormap[fg]);
}
static inline void setpixel(int x, int y, int color) {
if(y&1){
int color1 = scanline[x];
int color2 = color;
if(color1 == color2) {
if(prev_color1 == color1) {
putchar(' ');
} else {
printf("\033[4%sm ",colormap[color1]);
prev_color1 = color1;
}
} else {
if(prev_color1 != color1 && prev_color2 != color2) {
printf("\033[4%s;3%sm",colormap[color1],colormap[color2]);
prev_color1 = color1;
prev_color2 = color2;
} else if(prev_color1 != color1) {
printf("\033[4%sm",colormap[color1]);
prev_color1 = color1;
} else if(prev_color2 != color2) {
printf("\033[3%sm",colormap[color2]);
prev_color2 = color2;
}
printf("\u2583");
}
} else {
scanline[x] = color;
}
}
#define debug(...)
//#define debug printf
// torus radii and distance from camera
// these are pretty baked-in to other constants now, so it probably won't work
// if you change them too much.
const int dz = 5, r1 = 1, r2 = 2;
// "Magic circle algorithm"? DDA? I've seen this formulation in a few places;
// first in Hal Chamberlain's Musical Applications of Microprocessors, but not
// sure what to call it, or how to justify it theoretically. It seems to
// correctly rotate around a point "near" the origin, without losing magnitude
// over long periods of time, as long as there are enough bits of precision in x
// and y. I use 14 bits here.
#define R(s,x,y) x-=(y>>s); y+=(x>>s)
// CORDIC algorithm to find magnitude of |x,y| by rotating the x,y vector onto
// the x axis. This also brings vector (x2,y2) along for the ride, and writes
// back to x2 -- this is used to rotate the lighting vector from the normal of
// the torus surface towards the camera, and thus determine the lighting amount.
// We only need to keep one of the two lighting normal coordinates.
int length_cordic(int16_t x, int16_t y, int16_t *x2_, int16_t y2) {
#ifdef PRECISE
#define NIT 10
#else
#define NIT 5
#endif
int x2 = *x2_;
if (x < 0) { // start in right half-plane
x = -x;
x2 = -x2;
}
for (int i = 0; i<NIT; i++) {
int t = x;
int t2 = x2;
if (y < 0) {
x -= y >> i;
y += t >> i;
x2 -= y2 >> i;
y2 += t2 >> i;
} else {
x += y >> i;
y -= t >> i;
x2 += y2 >> i;
y2 -= t2 >> i;
}
}
// divide by 0.625 as a cheap approximation to the 0.607 scaling factor factor
// introduced by this algorithm (see https://en.wikipedia.org/wiki/CORDIC)
*x2_ = (x2 >> 1) + (x2 >> 3);
return (x >> 1) + (x >> 3)
#ifdef PRECISE
- (x >> 6) // get nrearer to 0.607 [Inigo Quilez]
#endif
;
}
int main() {
printf( "\033[48;5;16m" // set background color black
"\033[38;5;15m" // set foreground color white
"\033[H" // home
"\033[?25l" // hide cursor
"\033[2J"); // clear screen
int frame = 0;
// high-precision rotation directions, sines and cosines and their products
int16_t sB = 0, cB = 16384;
int16_t sA = 11583, cA = 11583;
int16_t sAsB = 0, cAsB = 0;
int16_t sAcB = 11583, cAcB = 11583;
int accurate_CPI_x_1000;
int accurate_MIPS_x_1000;
int CPI_x_1000;
stats_start();
for (;;) {
int display_on = (frame > START_FRAMES);
if(display_on) {
stats_start();
}
int x1_16 = cAcB << 2;
// yes this is a multiply but dz is 5 so it's (sb + (sb<<2)) >> 6 effectively
int p0x = dz * sB >> 6;
int p0y = dz * sAcB >> 6;
int p0z = -dz * cAcB >> 6;
const int r1i = r1*256;
const int r2i = r2*256;
int niters = 0;
int nnormals = 0;
int16_t yincC = (cA >> 6) + (cA >> 5); // 12*cA >> 8;
int16_t yincS = (sA >> 6) + (sA >> 5); // 12*sA >> 8;
int16_t xincX = (cB >> 7) + (cB >> 6); // 6*cB >> 8;
int16_t xincY = (sAsB >> 7) + (sAsB >> 6); // 6*sAsB >> 8;
int16_t xincZ = (cAsB >> 7) + (cAsB >> 6); // 6*cAsB >> 8;
int16_t ycA = -((cA >> 1) + (cA >> 4)); // -12 * yinc1 = -9*cA >> 4;
int16_t ysA = -((sA >> 1) + (sA >> 4)); // -12 * yinc2 = -9*sA >> 4;
//int dmin = INT_MAX, dmax = -INT_MAX;
int xsAsB = (sAsB >> 4) - sAsB; // -40*xincY
int xcAsB = (cAsB >> 4) - cAsB; // -40*xincZ;
for (int j = 0; j < 46; j++, ycA += yincC>>1, ysA += yincS>>1) {
int16_t vxi14 = (cB >> 4) - cB - sB; // -40*xincX - sB;
int16_t vyi14 = ycA - xsAsB - sAcB;
int16_t vzi14 = ysA + xcAsB + cAcB;
for (int i = 0; i < 79; i++, vxi14 += xincX, vyi14 -= xincY, vzi14 += xincZ) {
int t = 512; // (256 * dz) - r2i - r1i;
int16_t px = p0x + (vxi14 >> 5); // assuming t = 512, t*vxi>>8 == vxi<<1
int16_t py = p0y + (vyi14 >> 5);
int16_t pz = p0z + (vzi14 >> 5);
debug("pxyz (%+4d,%+4d,%+4d)\n", px, py, pz);
int16_t lx0 = sB >> 2;
int16_t ly0 = sAcB - cA >> 2;
int16_t lz0 = -cAcB - sA >> 2;
for (;;) {
int t0, t1, t2, d;
int16_t lx = lx0, ly = ly0, lz = lz0;
debug("[%2d,%2d] (px, py) = (%d, %d), (lx, ly) = (%d, %d) -> ", j, i, px, py, lx, ly);
t0 = length_cordic(px, py, &lx, ly);
debug("t0=%d (lx', ly') = (%d, %d)\n", t0, lx, ly);
t1 = t0 - r2i;
t2 = length_cordic(pz, t1, &lz, lx);
d = t2 - r1i;
t += d;
if (t > 8*256) {
// putchar(' ');
int N = (((j-frame)>>3)^(((i+frame)>>3)))&1;
if(display_on) setpixel(i,j,(N<<2)+26);
break;
} else if (d < 2) {
int N = lz >> 8;
// putchar(".,-~:;!*=#$@"[N > 0 ? N < 12 ? N : 11 : 0]);
N = N > 0 ? N < 26 ? N : 25 : 0;
if(display_on) setpixel(i,j,N);
nnormals++;
break;
}
// todo: shift and add version of this
/*
if (d < dmin) dmin = d;
if (d > dmax) dmax = d;
*/
#ifdef USE_MUL
px += d*vxi14 >> 14;
py += d*vyi14 >> 14;
pz += d*vzi14 >> 14;
#else
{
// 11x1.14 fixed point 3x parallel multiply
// only 16 bit registers needed; starts from highest bit to lowest
// d is about 2..1100, so 11 bits are sufficient
int16_t dx = 0, dy = 0, dz = 0;
int16_t a = vxi14, b = vyi14, c = vzi14;
while (d) {
if (d&1024) {
dx += a;
dy += b;
dz += c;
}
d = (d&1023) << 1;
a >>= 1;
b >>= 1;
c >>= 1;
}
// we already shifted down 10 bits, so get the last four
px += dx >> 4;
py += dy >> 4;
pz += dz >> 4;
}
#endif
niters++;
}
}
if(display_on && (j&1)) puts("");
}
if(display_on) printf("\033[0m"); // reset colors
stats_end();
if(frame == START_FRAMES) {
accurate_CPI_x_1000 = stats_CPI_times_1000;
accurate_MIPS_x_1000 = (MHZ * 1000000) / accurate_CPI_x_1000;
}
CPI_x_1000 = stats_CPI_times_1000;
uint64_t FPS_num = (uint64_t)(MHZ) * 1000000 * 1000;
uint64_t FPS_denom = stats_cycles;
int FPSx1000 = (int)(FPS_num / FPS_denom);
setcolors(25,33);
#ifdef USE_MUL
printf("%s RV32IM %dMHz ", CPU_NAME, MHZ);
#else
printf("%s RV32I %dMHz ", CPU_NAME, MHZ);
#endif
setcolors(25,0);
printf(" "); printk(FPSx1000); printf(" FPS ");
setcolors(0,25);
printf(" "); printk(CPI_x_1000);
printf(" ("); printk(accurate_CPI_x_1000); printf(") CPI ");
setcolors(25,0);
printf(" "); printk(accurate_MIPS_x_1000); printf(" MIPS");
/*
setcolors(0,25);
printf(" %d iterations ", niters);
setcolors(0,25);
printf(" %d lit pixels ", nnormals);
*/
setcolors(25,0);
printf("\x1b[K");
#ifdef __linux__
fflush(stdout);
#endif
// rotate sines, cosines, and products thereof
// this animates the torus rotation about two axes
R(5, cA, sA);
R(5, cAsB, sAsB);
R(5, cAcB, sAcB);
R(6, cB, sB);
R(6, cAcB, cAsB);
R(6, sAcB, sAsB);
#ifdef __linux__
usleep(15000);
#endif
printf("\r\x1b[23A");
++frame;
prev_color1=-1;
prev_color2=-1;
}
return 0;
}

11
FIRMWARE/errno.c Normal file
View File

@@ -0,0 +1,11 @@
// Sometimes __errno is not linked, here is a dummy replacement.
// Note that __errno is a function that returns a pointer to the
// actual __errno (this is for multithreading). Made me bang my
// head to the wall (and made tinyraytracer crash because powf()
// was *calling* __errno).
int* __errno() {
static int val = 0;
return &val;
}

27
FIRMWARE/hello.S Normal file
View File

@@ -0,0 +1,27 @@
# Hello world !
.section .text
.globl main
main:
.L0:
la a0, hello
call putstring
j .L0
putstring:
addi sp,sp,-4 # save ra on the stack
sw ra,0(sp) # (need to do that for functions that call functions)
mv t2,a0
.L1: lbu a0,0(t2)
beqz a0,.L2
call putchar
addi t2,t2,1
j .L1
.L2: lw ra,0(sp) # restore ra
addi sp,sp,4 # restore sp
ret
.section .data
hello:
.asciz "Hello, world !\n"

113
FIRMWARE/humanshader.c Normal file
View File

@@ -0,0 +1,113 @@
// C version of humanshader
// See https://humanshader.com/
// (using a computer is clearly not as fun, but it is interesting to have
// a small not too computationally expensive raytracing program that
// can run on small softcores for PGAs).
// Using the 16-bits version with no divide from here: https://www.shadertoy.com/view/XflXDs
#define GL_width 71
#define GL_height 40
#include "GL_tty.h"
void human_shader(
int x, int y, uint8_t* r_out, uint8_t* g_out, uint8_t* b_out
) {
int R, B;
//-------------------------
// Section A (2 MUL, 3 ADD)
//-------------------------
int u = x-36;
int v = 18-y;
int u2 = u*u;
int v2 = v*v;
int h = u2 + v2;
//-------------------------
if( h < 200 )
{
//-------------------------------------
// Section B, Sphere (4/7 MUL, 5/9 ADD)
//-------------------------------------
R = 420;
B = 520;
int t = 5200 + (h<<3);
int p = (t*u)>>7;
int q = (t*v)>>7;
// bounce light
int w = 18 + (((p*5-q*13))>>9);
if( w>0 ) R += w*w;
// sky light / ambient occlusion
int o = q + 900;
R = (R*o)>>12;
B = (B*o)>>12;
// sun/key light
if( p > -q )
{
int w = (p+q)>>3;
R += w;
B += w;
}
//-------------------------
}
else if( v<0 )
{
//-------------------------------------
// Section C, Ground (5/9 MUL, 6/9 ADD)
//-------------------------------------
R = 150 + (v<<1);
B = 50;
int p = h + (v2<<3);
int c = 240*(-v) - p;
// sky light / ambient occlusion
if( c>1200 )
{
int o = (25*c)>>3;
o = (c*(7840-o)>>9) - 8560;
R = (R*o)>>10;
B = (B*o)>>10;
}
// sun/key light with soft shadow
int r = c + u*v;
int d = 3200 - h - (r<<1);
if( d>0 ) R += d;
//-------------------------
}
else
{
//------------------------------
// Section D, Sky (1 MUL, 2 ADD)
//------------------------------
int c = x + (y<<2);
R = 132 + c;
B = 192 + c;
//-------------------------
}
//-------------------------
// Section E (3 MUL, 1 ADD)
//-------------------------
if(R > 255) R = 255;
if(B > 255) B = 255;
int G = (R*11 + 5*B)>>4;
//-------------------------
*r_out = (uint8_t)R;
*g_out = (uint8_t)G;
*b_out = (uint8_t)B;
}
int main() {
GL_init();
GL_scan_RGB(GL_width, GL_height, human_shader);
GL_terminate();
return 0;
}

10
FIRMWARE/io.h Normal file
View File

@@ -0,0 +1,10 @@
#include <stdint.h>
#define IO_BASE 0x400000
#define IO_LEDS 4
#define IO_UART_DAT 8
#define IO_UART_CNTL 16
#define IO_IN(port) *(volatile uint32_t*)(IO_BASE + port)
#define IO_OUT(port,val) *(volatile uint32_t*)(IO_BASE + port)=(val)

99
FIRMWARE/mandel_C.c Normal file
View File

@@ -0,0 +1,99 @@
/*
Computes and displays the Mandelbrot set on the OLED display.
*/
#include <stdio.h>
#ifdef __linux__
#include <unistd.h>
#else
#include "io.h"
#endif
#define W 46
#define H 46
#define mandel_shift 10
#define mandel_mul (1 << mandel_shift)
#define xmin -2*mandel_mul
#define ymax 2*mandel_mul
#define ymin -2*mandel_mul
#define xmax 2*mandel_mul
#define dx (xmax-xmin)/H
#define dy (ymax-ymin)/H
#define norm_max (4 << mandel_shift)
#define ANSIRGB(R,G,B) "\033[48;2;" #R ";" #G ";" #B "m "
const char* colormap[21] = {
ANSIRGB( 0, 0, 0),
ANSIRGB( 0, 0, 40),
ANSIRGB( 0, 0, 80),
ANSIRGB( 0, 0,120),
ANSIRGB( 0, 0,160),
ANSIRGB( 0, 0,200),
ANSIRGB( 0, 0,240),
ANSIRGB( 0, 0, 0),
ANSIRGB( 0, 40, 0),
ANSIRGB( 0, 80, 0),
ANSIRGB( 0,120, 0),
ANSIRGB( 0,160, 0),
ANSIRGB( 0,200, 0),
ANSIRGB( 0,240, 0),
ANSIRGB( 0, 0, 0),
ANSIRGB( 40, 0, 0),
ANSIRGB( 80, 0, 0),
ANSIRGB( 120, 0, 0),
ANSIRGB( 160, 0, 0),
ANSIRGB( 200, 0, 0),
ANSIRGB( 240, 0, 0)
};
int main() {
int frame=0;
for(;;) {
IO_OUT(IO_LEDS,frame);
int last_color = -1;
printf("\033[H");
int Ci = ymin;
for(int Y=0; Y<H; ++Y) {
int Cr = xmin;
for(int X=0; X<W; ++X) {
int Zr = Cr;
int Zi = Ci;
int iter = 20;
while(iter > 0) {
int Zrr = (Zr * Zr) >> mandel_shift;
int Zii = (Zi * Zi) >> mandel_shift;
int Zri = (Zr * Zi) >> (mandel_shift - 1);
Zr = Zrr - Zii + Cr;
Zi = Zri + Ci;
if(Zrr + Zii > norm_max) {
break;
}
--iter;
}
int color = (iter+frame)%21;
printf(color == last_color ? " " : colormap[color]);
last_color = color;
Cr += dx;
}
Ci += dy;
printf("\033[49m\n");
last_color = -1;
}
++frame;
#ifdef __linux__
usleep(100000);
#endif
// if(frame>4) break;
}
}

125
FIRMWARE/mandelbrot.S Normal file
View File

@@ -0,0 +1,125 @@
# Computes and displays the Mandelbrot set on the terminal.
# Needs NRV_IO_UART to be enabled.
#
# To access it, use:
# miniterm.py --dtr=0 /dev/ttyUSB1 115200
# or screen /dev/ttyUSB1 115200 (<ctrl> a \ to exit)
# Base address of memory-mapped IO,
# Loaded into gp at startup
.equ IO_BASE, 0x400000
# IO-reg offsets. To read or write one of them,
# use IO_XXX(gp)
.equ IO_LEDS, 4
.equ IO_UART_DAT, 8
.equ IO_UART_CNTL, 16
.equ mandel_shift, 10
.equ mandel_mul,(1 << mandel_shift)
.equ xmin, -2*mandel_mul
.equ xmax, 2*mandel_mul
.equ ymin, -2*mandel_mul
.equ ymax, 2*mandel_mul
.equ dx, (xmax-xmin)/80
.equ dy, (ymax-ymin)/80
.equ norm_max,(4 << mandel_shift)
.section .text
# X,Y : s0,s1
# Cr,Ci : s2,s3
# Zr,Zi : s4,s5
# Zrr,2Zri,Zii: s6,s7,s8
# cnt: s10
# 128: s11
.globl main
main:
mandelstart:
li t0, 5
sw t0, IO_LEDS(gp)
call wait
li t0, 10
sw t0, IO_LEDS(gp)
call wait
li t0, 5
sw t0, IO_LEDS(gp)
li t0, 10
sw t0, IO_LEDS(gp)
call wait
li t0, 0
sw t0, IO_LEDS(gp)
li s1,0
li s3,xmin
li s11,80
loop_y: li s0,0
li s2,ymin
loop_x: mv s4,s2 # Z <- C
mv s5,s3
li s10,9 # iter <- 9
loop_Z: mv a0,s4 # Zrr <- (Zr*Zr) >> mandel_shift
mv a1,s4
call __mulsi3
srli s6,a0,mandel_shift
mv a0,s4 # Zri <- (Zr*Zi) >> (mandel_shift-1)
mv a1,s5
call __mulsi3
srai s7,a0,mandel_shift-1
mv a0,s5 # Zii <- (Zi*Zi) >> (mandel_shift)
mv a1,s5
call __mulsi3
srli s8,a0,mandel_shift
sub s4,s6,s8 # Zr <- Zrr - Zii + Cr
add s4,s4,s2
add s5,s7,s3 # Zi <- 2Zri + Cr
add s6,s6,s8 # if norm > norm max, exit loop
li s7,norm_max
bgt s6,s7,exit_Z
add s10,s10,-1 # iter--, loop if non-zero
bnez s10, loop_Z
exit_Z:
la a0,colormap
add a0,a0,s10
lbu a0,0(a0)
call putchar
add s0,s0,1
add s2,s2,dx
bne s0,s11,loop_x
li a0,13
call putchar
li a0,10
call putchar
add s1,s1,1
add s3,s3,dy
bne s1,s11,loop_y
li t0, 15
sw t0, IO_LEDS(gp)
call putchar
li a0,13
call putchar
li a0,10
call putchar
j mandelstart
.section .data
colormap:
.ascii " .,:;ox%#@"

27
FIRMWARE/memcpy.c Normal file
View File

@@ -0,0 +1,27 @@
#include <stddef.h>
#include <stdint.h>
#pragma GCC optimize ("no-tree-loop-distribute-patterns")
void* memcpy(void * dst, void const * src, size_t len) {
uint32_t * plDst = (uint32_t *) dst;
uint32_t const * plSrc = (uint32_t const *) src;
// If source and destination are aligned,
// copy 32s bit by 32 bits.
if (!((uint32_t)src & 3) && !((uint32_t)dst & 3)) {
while (len >= 4) {
*plDst++ = *plSrc++;
len -= 4;
}
}
uint8_t* pcDst = (uint8_t *) plDst;
uint8_t const* pcSrc = (uint8_t const *) plSrc;
while (len--) {
*pcDst++ = *pcSrc++;
}
return dst;
}

26
FIRMWARE/notes.txt Normal file
View File

@@ -0,0 +1,26 @@
# https://blog.thea.codes/the-most-thoroughly-commented-linker-script/
# https://interrupt.memfault.com/blog/how-to-write-linker-scripts-for-firmware
bin/riscv64-unknown-elf-as -march=rv32i -mabi=ilp32 -mno-relax mandelbrot_terminal.S -o mandelbrot_terminal.o
riscv64-unknown-elf-ld mandelbrot_terminal.o -T baremetal.ld -m elf32lriscv -nostdlib -norelax
/home/blevy/Programming/learn-fpga/FemtoRV/FIRMWARE/TOOLS/firmware_words a.elf -ram 6144 -hex a.hex
FTDI 2232H
Rx >
Tx <
RTSn > Request to send
CTSn < Clear to send
DTRn > Data Terminal Ready
DSRn < Data Set Ready
DCDn > Data Carrier Detect
#set_io DCDn 1
#set_io DSRn 2
#set_io DTRn 3
#set_io CTSn 4
#set_io RTSn 7
set_io RS232_Tx_TTL 8
set_io RS232_Rx_TTL 9

19
FIRMWARE/perf.S Normal file
View File

@@ -0,0 +1,19 @@
.section .text
.globl rdcycle
.globl rdinstret
rdcycle:
.L0:
rdcycleh a1
rdcycle a0
rdcycleh t0
bne a1,t0,.L0
ret
rdinstret:
.L1:
rdinstreth a1
rdinstret a0
rdinstreth t0
bne a1,t0,.L1
ret

4
FIRMWARE/perf.h Normal file
View File

@@ -0,0 +1,4 @@
#include <stdint.h>
extern uint64_t rdcycle();
extern uint64_t rdinstret();

186
FIRMWARE/pi.c Normal file
View File

@@ -0,0 +1,186 @@
/*
* Computation of the n'th decimal digit of \pi with very little memory.
* Written by Fabrice Bellard on January 8, 1997.
*
* We use a slightly modified version of the method described by Simon
* Plouffe in "On the Computation of the n'th decimal digit of various
* transcendental numbers" (November 1996). We have modified the algorithm
* to get a running time of O(n^2) instead of O(n^3log(n)^3).
*
* This program uses mostly integer arithmetic. It may be slow on some
* hardwares where integer multiplications and divisons must be done
* by software. We have supposed that 'int' has a size of 32 bits. If
* your compiler supports 'long long' integers of 64 bits, you may use
* the integer version of 'mul_mod' (see HAS_LONG_LONG).
*/
/* Adapted to FemtoRV32 (Bruno Levy Feb. 2021) */
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
// #include "errno_fix.h"
//#define RV32_FASTCODE __attribute((section(".fastcode")))
#define RV32_FASTCODE
/* uncomment the following line to use 'long long' integers */
#define HAS_LONG_LONG
#ifdef HAS_LONG_LONG
#define mul_mod(a,b,m) (( (long long) (a) * (long long) (b) ) % (m))
#else
#define mul_mod(a,b,m) fmod( (double) a * (double) b, m)
#endif
/* return the inverse of x mod y */
int inv_mod(int x, int y) RV32_FASTCODE;
int inv_mod(int x, int y)
{
int q, u, v, a, c, t;
u = x;
v = y;
c = 1;
a = 0;
do {
q = v / u;
t = c;
c = a - q * c;
a = t;
t = u;
u = v - q * u;
v = t;
} while (u != 0);
a = a % y;
if (a < 0)
a = y + a;
return a;
}
/* return (a^b) mod m */
int pow_mod(int a, int b, int m) RV32_FASTCODE;
int pow_mod(int a, int b, int m)
{
int r, aa;
r = 1;
aa = a;
while (1) {
if (b & 1)
r = mul_mod(r, aa, m);
b = b >> 1;
if (b == 0)
break;
aa = mul_mod(aa, aa, m);
}
return r;
}
/* return true if n is prime */
int is_prime(int n) RV32_FASTCODE;
int is_prime(int n)
{
int r, i;
if ((n % 2) == 0)
return 0;
//r = (int) (sqrt(n));
//for (i = 3; i <= r; i += 2)
for (i = 3; i*i <= n; i += 2)
if ((n % i) == 0)
return 0;
return 1;
}
/* return the prime number immediatly after n */
int next_prime(int n) RV32_FASTCODE;
int next_prime(int n)
{
do {
n++;
} while (!is_prime(n));
return n;
}
int digits(int n) RV32_FASTCODE;
int digits(int n) {
int av, a, vmax, N, num, den, k, kq, kq2, t, v, s, i;
double sum;
N = (int) ((n + 20) * log(10) / log(2));
sum = 0;
for (a = 3; a <= (2 * N); a = next_prime(a)) {
vmax = (int) (log(2 * N) / log(a));
av = 1;
for (i = 0; i < vmax; i++)
av = av * a;
s = 0;
num = 1;
den = 1;
v = 0;
kq = 1;
kq2 = 1;
for (k = 1; k <= N; k++) {
t = k;
if (kq >= a) {
do {
t = t / a;
v--;
} while ((t % a) == 0);
kq = 0;
}
kq++;
num = mul_mod(num, t, av);
t = (2 * k - 1);
if (kq2 >= a) {
if (kq2 == a) {
do {
t = t / a;
v++;
} while ((t % a) == 0);
}
kq2 -= a;
}
den = mul_mod(den, t, av);
kq2 += 2;
if (v > 0) {
t = inv_mod(den, av);
t = mul_mod(t, num, av);
t = mul_mod(t, k, av);
for (i = v; i < vmax; i++)
t = mul_mod(t, a, av);
s += t;
if (s >= av)
s -= av;
}
}
t = pow_mod(10, n - 1, av);
s = mul_mod(s, t, av);
sum = fmod(sum + (double) s / (double) av, 1.0);
}
return (int) (sum * 1e9);
}
void main() {
printf("\npi = 3.");
for(int n=1; ;n+=9) {
printf("%d",digits(n));
if(n > 36) break;
}
}

29
FIRMWARE/pipeline.ld Normal file
View File

@@ -0,0 +1,29 @@
MEMORY {
PROGROM (RX) : ORIGIN = 0x00000, LENGTH = 0x10000 /* 64kB ROM */
DATARAM (RW) : ORIGIN = 0x10000, LENGTH = 0x10000 /* 64kB RAM */
}
SECTIONS {
.text : {
. = ALIGN(4);
start_pipeline.o (.text)
*(.text*)
} > PROGROM
.data : {
. = ALIGN(4);
*(.data*)
*(.sdata*)
*(.rodata*)
*(.srodata*)
*(.bss*)
*(.sbss*)
*(COMMON)
*(.eh_frame)
*(.eh_frame_hdr)
*(.init_array*)
*(.gcc_except_table*)
} > DATARAM
}

65
FIRMWARE/print.c Normal file
View File

@@ -0,0 +1,65 @@
#include <stdarg.h>
void print_string(const char* s) {
for(const char* p = s; *p; ++p) {
putchar(*p);
}
}
int puts(const char* s) {
print_string(s);
putchar('\n');
return 1;
}
void print_dec(int val) {
char buffer[255];
char *p = buffer;
if(val < 0) {
putchar('-');
print_dec(-val);
return;
}
while (val || p == buffer) {
*(p++) = val % 10;
val = val / 10;
}
while (p != buffer) {
putchar('0' + *(--p));
}
}
void print_hex(unsigned int val) {
print_hex_digits(val, 8);
}
void print_hex_digits(unsigned int val, int nbdigits) {
for (int i = (4*nbdigits)-4; i >= 0; i -= 4) {
putchar("0123456789ABCDEF"[(val >> i) % 16]);
}
}
int printf(const char *fmt,...)
{
va_list ap;
for(va_start(ap, fmt);*fmt;fmt++)
{
if(*fmt=='%')
{
fmt++;
if(*fmt=='s') print_string(va_arg(ap,char *));
else if(*fmt=='x') print_hex(va_arg(ap,int));
else if(*fmt=='d') print_dec(va_arg(ap,int));
else if(*fmt=='c') putchar(va_arg(ap,int));
else putchar(*fmt);
}
else putchar(*fmt);
}
va_end(ap);
return 0;
}

22
FIRMWARE/putchar.S Normal file
View File

@@ -0,0 +1,22 @@
# Base address of memory-mapped IO,
# Loaded into gp at startup
.equ IO_BASE, 0x400000
# IO-reg offsets. To read or write one of them,
# use IO_XXX(gp)
.equ IO_LEDS, 4
.equ IO_UART_DAT, 8
.equ IO_UART_CNTL, 16
.section .text
.globl putchar
putchar:
sw a0, IO_UART_DAT(gp)
li t0, 1<<9
.L0:
lw t1, IO_UART_CNTL(gp)
and t1, t1, t0
bnez t1, .L0
ret

518
FIRMWARE/raystones.c Normal file
View File

@@ -0,0 +1,518 @@
/* A port of Dmitry Sokolov's tiny raytracer to C and to FemtoRV32 */
/* Displays on the small OLED display and/or HDMI */
/* Bruno Levy, 2020 */
/* Original tinyraytracer: https://github.com/ssloy/tinyraytracer */
#include <stdint.h>
#include <math.h>
#include <stdlib.h>
#include "perf.h"
#include "io.h"
/*******************************************************************/
typedef int BOOL;
static inline float max(float x, float y) { return x>y?x:y; }
static inline float min(float x, float y) { return x<y?x:y; }
/*******************************************************************/
// If you want to adapt tinyraytracer to your own platform, there are
// mostly two macros and two functions to write:
// graphics_width
// graphics_height
// graphics_init()
// graphics_set_pixel()
//
// You can also write the following functions (or leave them empty if
// you do not need them):
// graphics_terminate()
// stats_begin_frame()
// stats_begin_pixel()
// stats_end_pixel()
// stats_end_frame()
// Size of the screen
// Replace with your own variables or values
// Benchmark
// - graphics deactivated (else UART waiting loop gives
// different results according to CPU freq / UART baud rate
// ratio).
// - smaller image size (for faster run in simulation)
static int graphics_width = 120;
static int graphics_height = 60;
static int bench_run=0;
// Two pixels per character using UTF8 character set
// (comment-out if terminal does not support it)
#define graphics_double_lines
// Replace with your own stuff to initialize graphics
static inline void graphics_init() {
printf("\033[48;5;16m" // set background color black
"\033[38;5;15m" // set foreground color white
"\033[H" // home
"\033[2J"); // clear screen
}
// Replace with your own stuff to terminate graphics or leave empty
// Here I send <ctrl><D> to the UART, to exit the simulation in Verilator,
// it is captured by special code in RTL/DEVICES/uart.v
static inline void graphics_terminate() {
printf("\033[48;5;16m" // set background color black
"\033[38;5;15m" // set foreground color white
);
}
// Replace with your own code.
void graphics_set_pixel(int x, int y, float r, float g, float b) {
r = max(0.0f, min(1.0f, r));
g = max(0.0f, min(1.0f, g));
b = max(0.0f, min(1.0f, b));
uint8_t R = (uint8_t)(255.0f * r);
uint8_t G = (uint8_t)(255.0f * g);
uint8_t B = (uint8_t)(255.0f * b);
// graphics output deactivated for bench run
if(bench_run) {
if(y & 1) {
if(x == graphics_width-1) {
printf("%d",y/2);
}
}
return;
}
#ifdef graphics_double_lines
static uint8_t prev_R=0;
static uint8_t prev_G=0;
static uint8_t prev_B=0;
if(y&1) {
if((R == prev_R) && (G == prev_G) && (B == prev_B)) {
printf("\033[48;2;%d;%d;%dm ",(int)R,(int)G,(int)B);
} else {
printf("\033[48;2;%d;%d;%dm",(int)prev_R,(int)prev_G,(int)prev_B);
printf("\033[38;2;%d;%d;%dm",(int)R,(int)G,(int)B);
// https://www.w3.org/TR/xml-entity-names/025.html
// https://onlineunicodetools.com/convert-unicode-to-utf8
printf("\xE2\x96\x83");
}
if(x == graphics_width-1) {
printf("\033[38;2;0;0;0m");
printf("\033[48;2;0;0;0m\n");
}
} else {
prev_R = R;
prev_G = G;
prev_B = B;
}
#else
printf("\033[48;2;%d;%d;%dm ",(int)R,(int)G,(int)B);
if(x == graphics_width-1) {
printf("\033[48;2;0;0;0m\n");
}
#endif
}
// Begins statistics collection for current pixel
// Leave emtpy if not needed.
// There are these two levels because on some
// femtorv32 cores (quark, tachyon), the clock tick counter does not
// have sufficient bits and will wrap during the time taken by
// rendering a frame (up to several minutes).
static inline stats_begin_pixel() {
}
// Ends statistics collection for current pixel
// Leave emtpy if not needed.
static inline stats_end_pixel() {
}
// Print "fixed point" number (integer/1000)
static void printk(uint64_t kx) {
int intpart = (int)(kx / 1000);
int fracpart = (int)(kx % 1000);
printf("%d.",intpart);
if(fracpart<100) {
printf("0");
}
if(fracpart<10) {
printf("0");
}
printf("%d",fracpart);
}
static uint64_t instret_start;
static uint64_t cycles_start;
// Begins statistics collection for current frame.
// Leave emtpy if not needed.
static inline stats_begin_frame() {
instret_start = rdinstret();
cycles_start = rdcycle();
}
// Ends statistics collection for current frame
// and displays result.
// Leave emtpy if not needed.
static inline stats_end_frame() {
graphics_terminate();
uint64_t instret = rdinstret() - instret_start;
uint64_t cycles = rdcycle() - cycles_start ;
uint64_t kCPI = cycles*1000/instret;
uint64_t pixels = graphics_width * graphics_height;
uint64_t kRAYSTONES = (pixels*1000000000)/cycles;
printf(
"\n%dx%d %s ",
graphics_width,graphics_height,
bench_run ?
"no gfx output (measurement is accurate)" :
"gfx output (measurement is NOT accurate)"
);
printf("CPI="); printk(kCPI); printf(" ");
printf("RAYSTONES="); printk(kRAYSTONES);
printf("\n");
}
// Normally you will not need to modify anything beyond that point.
/*******************************************************************/
typedef struct { float x,y,z; } vec3;
typedef struct { float x,y,z,w; } vec4;
static inline vec3 make_vec3(float x, float y, float z) {
vec3 V;
V.x = x; V.y = y; V.z = z;
return V;
}
static inline vec4 make_vec4(float x, float y, float z, float w) {
vec4 V;
V.x = x; V.y = y; V.z = z; V.w = w;
return V;
}
static inline vec3 vec3_neg(vec3 V) {
return make_vec3(-V.x, -V.y, -V.z);
}
static inline vec3 vec3_add(vec3 U, vec3 V) {
return make_vec3(U.x+V.x, U.y+V.y, U.z+V.z);
}
static inline vec3 vec3_sub(vec3 U, vec3 V) {
return make_vec3(U.x-V.x, U.y-V.y, U.z-V.z);
}
static inline float vec3_dot(vec3 U, vec3 V) {
return U.x*V.x+U.y*V.y+U.z*V.z;
}
static inline vec3 vec3_scale(float s, vec3 U) {
return make_vec3(s*U.x, s*U.y, s*U.z);
}
static inline float vec3_length(vec3 U) {
return sqrtf(U.x*U.x+U.y*U.y+U.z*U.z);
}
static inline vec3 vec3_normalize(vec3 U) {
return vec3_scale(1.0f/vec3_length(U),U);
}
/*************************************************************************/
typedef struct Light {
vec3 position;
float intensity;
} Light;
Light make_Light(vec3 position, float intensity) {
Light L;
L.position = position;
L.intensity = intensity;
return L;
}
/*************************************************************************/
typedef struct {
float refractive_index;
vec4 albedo;
vec3 diffuse_color;
float specular_exponent;
} Material;
Material make_Material(float r, vec4 a, vec3 color, float spec) {
Material M;
M.refractive_index = r;
M.albedo = a;
M.diffuse_color = color;
M.specular_exponent = spec;
return M;
}
Material make_Material_default() {
Material M;
M.refractive_index = 1;
M.albedo = make_vec4(1,0,0,0);
M.diffuse_color = make_vec3(0,0,0);
M.specular_exponent = 0;
return M;
}
/*************************************************************************/
typedef struct {
vec3 center;
float radius;
Material material;
} Sphere;
Sphere make_Sphere(vec3 c, float r, Material M) {
Sphere S;
S.center = c;
S.radius = r;
S.material = M;
return S;
}
BOOL Sphere_ray_intersect(Sphere* S, vec3 orig, vec3 dir, float* t0) {
vec3 L = vec3_sub(S->center, orig);
float tca = vec3_dot(L,dir);
float d2 = vec3_dot(L,L) - tca*tca;
float r2 = S->radius*S->radius;
if (d2 > r2) return 0;
float thc = sqrtf(r2 - d2);
*t0 = tca - thc;
float t1 = tca + thc;
if (*t0 < 0) *t0 = t1;
if (*t0 < 0) return 0;
return 1;
}
vec3 reflect(vec3 I, vec3 N) {
return vec3_sub(I, vec3_scale(2.f*vec3_dot(I,N),N));
}
vec3 refract(vec3 I, vec3 N, float eta_t, float eta_i /* =1.f */) {
// Snell's law
float cosi = -max(-1.f, min(1.f, vec3_dot(I,N)));
// if the ray comes from the inside the object, swap the air and the media
if (cosi<0) return refract(I, vec3_neg(N), eta_i, eta_t);
float eta = eta_i / eta_t;
float k = 1 - eta*eta*(1 - cosi*cosi);
// k<0 = total reflection, no ray to refract.
// I refract it anyways, this has no physical meaning
return k<0 ? make_vec3(1,0,0)
: vec3_add(vec3_scale(eta,I),vec3_scale((eta*cosi - sqrtf(k)),N));
}
BOOL scene_intersect(
vec3 orig, vec3 dir, Sphere* spheres, int nb_spheres,
vec3* hit, vec3* N, Material* material
) {
float spheres_dist = 1e30;
for(int i=0; i<nb_spheres; ++i) {
float dist_i;
if(
Sphere_ray_intersect(&spheres[i], orig, dir, &dist_i) &&
(dist_i < spheres_dist)
) {
spheres_dist = dist_i;
*hit = vec3_add(orig,vec3_scale(dist_i,dir));
*N = vec3_normalize(vec3_sub(*hit, spheres[i].center));
*material = spheres[i].material;
}
}
float checkerboard_dist = 1e30;
if (fabs(dir.y)>1e-3) {
float d = -(orig.y+4)/dir.y; // the checkerboard plane has equation y = -4
vec3 pt = vec3_add(orig, vec3_scale(d,dir));
if (d>0 && fabs(pt.x)<10 && pt.z<-10 && pt.z>-30 && d<spheres_dist) {
checkerboard_dist = d;
*hit = pt;
*N = make_vec3(0,1,0);
material->diffuse_color =
(((int)(.5*hit->x+1000) + (int)(.5*hit->z)) & 1)
? make_vec3(.3, .3, .3)
: make_vec3(.3, .2, .1);
}
}
return min(spheres_dist, checkerboard_dist)<1000;
}
vec3 cast_ray(
vec3 orig, vec3 dir, Sphere* spheres, int nb_spheres,
Light* lights, int nb_lights, int depth /* =0 */
) {
vec3 point,N;
Material material = make_Material_default();
if (
depth>2 ||
!scene_intersect(orig, dir, spheres, nb_spheres, &point, &N, &material)
) {
float s = 0.5*(dir.y + 1.0);
return vec3_add(
vec3_scale(s,make_vec3(0.2, 0.7, 0.8)),
vec3_scale(s,make_vec3(0.0, 0.0, 0.5))
);
}
vec3 reflect_dir=vec3_normalize(reflect(dir, N));
vec3 refract_dir=vec3_normalize(refract(dir,N,material.refractive_index,1));
// offset the original point to avoid occlusion by the object itself
vec3 reflect_orig =
vec3_dot(reflect_dir,N) < 0
? vec3_sub(point,vec3_scale(1e-3,N))
: vec3_add(point,vec3_scale(1e-3,N));
vec3 refract_orig =
vec3_dot(refract_dir,N) < 0
? vec3_sub(point,vec3_scale(1e-3,N))
: vec3_add(point,vec3_scale(1e-3,N));
vec3 reflect_color = cast_ray(
reflect_orig, reflect_dir, spheres, nb_spheres,
lights, nb_lights, depth + 1
);
vec3 refract_color = cast_ray(
refract_orig, refract_dir, spheres, nb_spheres,
lights, nb_lights, depth + 1
);
float diffuse_light_intensity = 0, specular_light_intensity = 0;
for (int i=0; i<nb_lights; i++) {
vec3 light_dir = vec3_normalize(vec3_sub(lights[i].position,point));
float light_distance = vec3_length(vec3_sub(lights[i].position,point));
vec3 shadow_orig =
vec3_dot(light_dir,N) < 0
? vec3_sub(point,vec3_scale(1e-3,N))
: vec3_add(point,vec3_scale(1e-3,N)) ;
// checking if the point lies in the shadow of the lights[i]
vec3 shadow_pt, shadow_N;
Material tmpmaterial;
if (
scene_intersect(
shadow_orig, light_dir, spheres, nb_spheres,
&shadow_pt, &shadow_N, &tmpmaterial
) && (
vec3_length(vec3_sub(shadow_pt,shadow_orig)) < light_distance
)
) continue ;
diffuse_light_intensity +=
lights[i].intensity * max(0.f, vec3_dot(light_dir,N));
float abc = max(
0.f, vec3_dot(vec3_neg(reflect(vec3_neg(light_dir), N)),dir)
);
float def = material.specular_exponent;
if(abc > 0.0f && def > 0.0f) {
specular_light_intensity += powf(abc,def)*lights[i].intensity;
}
}
vec3 result = vec3_scale(
diffuse_light_intensity * material.albedo.x, material.diffuse_color
);
result = vec3_add(
result, vec3_scale(specular_light_intensity * material.albedo.y,
make_vec3(1,1,1))
);
result = vec3_add(result, vec3_scale(material.albedo.z, reflect_color));
result = vec3_add(result, vec3_scale(material.albedo.w, refract_color));
return result;
}
static inline void render_pixel(
int i, int j, Sphere* spheres, int nb_spheres, Light* lights, int nb_lights
) {
const float fov = M_PI/3.;
stats_begin_pixel();
float dir_x = (i + 0.5) - graphics_width/2.;
float dir_y = -(j + 0.5) + graphics_height/2.; // this flips the image.
float dir_z = -graphics_height/(2.*tan(fov/2.));
vec3 C = cast_ray(
make_vec3(0,0,0), vec3_normalize(make_vec3(dir_x, dir_y, dir_z)),
spheres, nb_spheres, lights, nb_lights, 0
);
graphics_set_pixel(i,j,C.x,C.y,C.z);
stats_end_pixel();
}
void render(Sphere* spheres, int nb_spheres, Light* lights, int nb_lights) {
stats_begin_frame();
#ifdef graphics_double_lines
for (int j = 0; j<graphics_height; j+=2) {
for (int i = 0; i<graphics_width; i++) {
render_pixel(i,j ,spheres,nb_spheres,lights,nb_lights);
render_pixel(i,j+1,spheres,nb_spheres,lights,nb_lights);
}
}
#else
for (int j = 0; j<graphics_height; j++) {
for (int i = 0; i<graphics_width; i++) {
render_pixel(i,j ,spheres,nb_spheres,lights,nb_lights);
}
}
#endif
stats_end_frame();
}
int nb_spheres = 4;
Sphere spheres[4];
int nb_lights = 3;
Light lights[3];
void init_scene() {
Material ivory = make_Material(
1.0, make_vec4(0.6, 0.3, 0.1, 0.0), make_vec3(0.4, 0.4, 0.3), 50.
);
Material glass = make_Material(
1.5, make_vec4(0.0, 0.5, 0.1, 0.8), make_vec3(0.6, 0.7, 0.8), 125.
);
Material red_rubber = make_Material(
1.0, make_vec4(0.9, 0.1, 0.0, 0.0), make_vec3(0.3, 0.1, 0.1), 10.
);
Material mirror = make_Material(
1.0, make_vec4(0.0, 10.0, 0.8, 0.0), make_vec3(1.0, 1.0, 1.0), 142.
);
spheres[0] = make_Sphere(make_vec3(-3, 0, -16), 2, ivory);
spheres[1] = make_Sphere(make_vec3(-1.0, -1.5, -12), 2, glass);
spheres[2] = make_Sphere(make_vec3( 1.5, -0.5, -18), 3, red_rubber);
spheres[3] = make_Sphere(make_vec3( 7, 5, -18), 4, mirror);
lights[0] = make_Light(make_vec3(-20, 20, 20), 1.5);
lights[1] = make_Light(make_vec3( 30, 50, -25), 1.8);
lights[2] = make_Light(make_vec3( 30, 20, 30), 1.7);
}
int main() {
init_scene();
graphics_init();
IO_OUT(IO_LEDS,5);
bench_run = 1;
graphics_width = 40;
graphics_height = 20;
printf("Running without graphic output (for accurate measurement)...\n");
render(spheres, nb_spheres, lights, nb_lights);
IO_OUT(IO_LEDS,10);
bench_run = 0;
graphics_width = 120;
graphics_height = 60;
render(spheres, nb_spheres, lights, nb_lights);
IO_OUT(IO_LEDS,15);
graphics_terminate();
return 0;
}

14
FIRMWARE/read_spiflash.c Normal file
View File

@@ -0,0 +1,14 @@
#include "io.h"
#define SPI_FLASH_BASE ((char*)(1 << 23))
int main() {
for(int i=0; i<16; ++i) {
IO_OUT(IO_LEDS,i);
int lo = (int)SPI_FLASH_BASE[2*i ];
int hi = (int)SPI_FLASH_BASE[2*i+1];
print_hex_digits((hi << 8) | lo,4); // print four hexadecimal digits
printf(" ");
}
printf("\n");
}

135
FIRMWARE/riscv_logo.c Normal file
View File

@@ -0,0 +1,135 @@
/*
* FEMTORV32 - FEMTOSOC demo program:
* Displaying a rotating RISCV logo
*/
#include <stdio.h>
#ifdef __linux__
#include <unistd.h>
#endif
/* The RISCV logo, with a tiny resolution
* (remember, I only got 4Kb of RAM
* on the IceStick !)
*/
unsigned char logo[16][16] = {
{7,7,7,7,7,7,5,3,3,3,3,3,3,3,3,7},
{7,7,7,7,7,7,7,5,3,3,3,3,3,3,3,7},
{1,1,1,1,2,7,7,7,3,3,3,3,3,3,3,7},
{0,0,0,0,0,1,7,7,5,3,3,3,3,3,3,7},
{0,0,0,0,0,0,7,7,6,3,3,3,3,3,6,7},
{0,0,0,0,0,0,7,7,5,3,3,3,3,4,7,7},
{0,0,0,0,0,2,7,7,4,3,3,3,3,7,7,7},
{0,2,2,2,7,7,7,6,3,3,3,3,6,7,7,7},
{0,7,7,7,7,7,6,3,3,3,3,5,7,7,2,7},
{0,1,7,7,7,4,3,3,3,3,3,7,7,7,0,7},
{0,0,2,7,7,6,3,3,3,3,6,7,7,1,0,7},
{0,0,0,2,7,7,5,3,3,5,7,7,2,0,0,7},
{0,0,0,0,7,7,7,5,4,7,7,2,0,0,0,7},
{0,0,0,0,0,7,7,7,7,7,7,0,0,0,0,7},
{0,0,0,0,0,1,7,7,7,7,1,0,0,0,0,7},
{7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7}
};
/*
* ANSI color codes:
* https://stackoverflow.com/questions/4842424/list-of-ansi-color-escape-sequences
*/
#define ANSIRGB(R,G,B) "\033[48;2;" #R ";" #G ";" #B "m "
#define ANSICOL(C) "\033[" #C "m "
/*
* The colormap.
*/
/*
* This ones corresponds to the official RISC-V logo,
* but uses more bandwidth (full RGB ANSI codes)
*/
/*
const char* cmap[8] = {
ANSIRGB(040,051,116),
ANSIRGB(123,128,155),
ANSIRGB(170,172,188),
ANSIRGB(249,177,021),
ANSIRGB(249,190,101),
ANSIRGB(249,199,130),
ANSIRGB(252,216,176),
ANSIRGB(250,251,248)
};
*/
/* more compact colormap */
const char* cmap[8] = {
ANSICOL(44),
ANSICOL(104),
ANSICOL(47),
ANSICOL(102),
ANSICOL(103),
ANSICOL(103),
ANSICOL(103),
ANSICOL(107)
};
/*
* Generated by TOOLS/make_sintab.c
*/
int sintab[64] = {
0,25,49,74,97,120,142,162,181,197,212,225,236,244,251,254,
256,254,251,244,236,225,212,197,181,162,142,120,97,74,49,25,
0,-25,-49,-74,-97,-120,-142,-162,-181,-197,-212,-225,-236,-244,
-251,-254,-256,-254,-251,-244,-236,-225,-212,-197,-181,-162,
-142,-120,-97,-74,-49,-25
};
#define GL_width 40
#define GL_height 40
void main() {
int frame = 0;
int last_col = -1;
for(;;) {
printf("\033[H"); // reset cursor position
int scaling = (sintab[frame&63]+400) << 1;
int Ux = scaling*sintab[frame & 63];
int Uy = scaling*sintab[(frame + 16) & 63];
int Vx = -Uy;
int Vy = Ux;
int X0 = -(GL_width/2)*(Ux+Vx);
int Y0 = -(GL_height/2)*(Uy+Vy);
for(int y=0; y<GL_height; ++y) {
int X = X0;
int Y = Y0;
for(int x=0; x<GL_width; ++x) {
unsigned char col = logo[(Y >> 18)&15][(X >> 18)&15];
printf(col == last_col ? " " : cmap[col]);
last_col = col;
X += Ux;
Y += Uy;
}
printf("\033[49m\n"); // reset color to black and newline
last_col = -1;
X0 += Vx;
Y0 += Vy;
}
++frame;
#ifdef __linux__
usleep(20000);
#endif
// if(frame > 20) break;
}
}

106
FIRMWARE/sieve.c Normal file
View File

@@ -0,0 +1,106 @@
// Taken from picorv32
//
// This is free and unencumbered software released into the public domain.
//
// Anyone is free to copy, modify, publish, use, compile, sell, or
// distribute this software, either in source code form or as a compiled
// binary, for any purpose, commercial or non-commercial, and by any
// means.
// A simple Sieve of Eratosthenes
#include <stdio.h>
#include <stdint.h>
/*************************************************************************/
// Note: if this is changed, then checksum need
// to be updated as well.
#define BITMAP_SIZE 64
typedef int bool;
static uint32_t bitmap[BITMAP_SIZE/32];
static uint32_t hash;
static uint32_t mkhash(uint32_t a, uint32_t b)
{
// The XOR version of DJB2
return ((a << 5) + a) ^ b;
}
static void bitmap_set(int idx)
{
bitmap[idx/32] |= 1 << (idx % 32);
}
static bool bitmap_get(int idx)
{
return (bitmap[idx/32] & (1 << (idx % 32))) != 0;
}
static void print_prime(int idx, int val)
{
if (idx < 10)
printf(" ");
printf("%d",idx);
if (idx / 10 == 1)
goto force_th;
switch (idx % 10) {
case 1: printf("st"); break;
case 2: printf("nd"); break;
case 3: printf("rd"); break;
force_th:
default: printf("th"); break;
}
printf(" prime: %d\n",val);
hash = mkhash(hash, idx);
hash = mkhash(hash, val);
}
void sieve(void)
{
int idx = 1;
hash = 5381;
print_prime(idx++, 2);
for (int i = 0; i < BITMAP_SIZE; i++) {
if (bitmap_get(i))
continue;
print_prime(idx++, 3+2*i);
for (int j = 2*(3+2*i);; j += 3+2*i) {
if (j%2 == 0)
continue;
int k = (j-3)/2;
if (k >= BITMAP_SIZE)
break;
bitmap_set(k);
}
}
printf("checksum:\n %x",hash);
if (hash == 0x1772A48F) {
printf(" OK\n");
} else {
printf(" ERROR\n");
}
}
int main(void)
{
for(;;) {
sieve();
for(int i=0; i<10; ++i) {
wait();
}
}
return 0;
}

10
FIRMWARE/spiflash0.ld Normal file
View File

@@ -0,0 +1,10 @@
MEMORY {
FLASH (RX) : ORIGIN = 0x00820000, LENGTH = 0x100000 /* 4 MB in flash */
}
SECTIONS {
everything : {
. = ALIGN(4);
start.o (.text)
*(.*)
} >FLASH
}

60
FIRMWARE/spiflash1.ld Normal file
View File

@@ -0,0 +1,60 @@
/* Linker script for programs stored in SPI flash */
/* Inspired from picorv32/picosoc/sections.lds */
/* */
/* text and rodata sections are sent to flash */
/* bss sections are sent to BRAM */
/* data sections are sent to BRAM and have */
/* initialization data in flash. */
/* AT keyword specifies LMA (Load Memory Address) */
MEMORY {
FLASH (rx) : ORIGIN = 0x00820000, LENGTH = 0x100000 /* 4 MB in flash */
RAM (rwx) : ORIGIN = 0x00000000, LENGTH = 0x1800 /* 6 kB in RAM */
}
SECTIONS {
/*
* This is the initialized data and fastcode section
* The program executes knowing that the data is in the RAM
* but the loader puts the initial values in the FLASH (inidata).
* It is one task of the startup (crt0_spiflash.S) to copy the initial values from FLASH to RAM.
*/
.data : AT ( _sidata ) {
. = ALIGN(4);
_sdata = .; /* create a global symbol at data start; used by startup code in order to initialise the .data section in RAM */
_ram_start = .; /* create a global symbol at ram start (e.g., for garbage collector) */
/* Initialized data */
*(.data*)
*(.sdata*)
. = ALIGN(4);
_edata = .; /* define a global symbol at data end; used by startup code in order to initialise the .data section in RAM */
} > RAM
/* The (non fastcode) program code and other data goes into FLASH */
.text : {
. = ALIGN(4);
start_spiflash1.o(.text) /* c runtime initialization (code) */
*(.text*) /* .text* sections (code) */
. = ALIGN(4);
*(.rodata*) /* .rodata* sections (constants, strings, etc.) */
*(.srodata*) /* .rodata* sections (constants, strings, etc.) */
_etext = .; /* define a global symbol at end of code */
_sidata = _etext; /* This is used by the startup in order to initialize the .data section */
} >FLASH
/* Uninitialized data section */
.bss : {
. = ALIGN(4);
_sbss = .; /* define a global symbol at bss start; used by startup code */
*(.bss*)
*(.sbss*)
*(COMMON)
. = ALIGN(4);
_ebss = .; /* define a global symbol at bss end; used by startup code */
} >RAM
}

80
FIRMWARE/spiflash2.ld Normal file
View File

@@ -0,0 +1,80 @@
/* Linker script for programs stored in SPI flash */
/* Inspired from picorv32/picosoc/sections.lds */
/* */
/* text and rodata sections are sent to flash */
/* bss sections are sent to BRAM */
/* data sections are sent to BRAM and have */
/* initialization data in flash. */
/* AT keyword specifies LMA (Load Memory Address) */
MEMORY {
FLASH (rx) : ORIGIN = 0x00820000, LENGTH = 0x100000 /* 4 MB in flash */
RAM (rwx) : ORIGIN = 0x00000000, LENGTH = 0x1800 /* 6 kB in RAM */
}
SECTIONS {
/*
* This is the initialized data and fastcode section
* The program executes knowing that the data is in the RAM
* but the loader puts the initial values in the FLASH (inidata).
* It is one task of the startup (crt0_spiflash.S) to copy the initial values from FLASH to RAM.
*/
.data_and_fastcode : AT ( _sidata ) {
. = ALIGN(4);
_sdata = .; /* create a global symbol at data start; used by startup code in order to initialise the .data section in RAM */
_ram_start = .; /* create a global symbol at ram start (e.g., for garbage collector) */
/* Initialized data */
*(.data*)
*(.sdata*)
/* integer mul and div */
*/libgcc.a:muldi3.o(.text)
*/libgcc.a:div.o(.text)
putchar.o(.text)
print.o(.text)
/* functions with attribute((section(".fastcode"))) */
*(.fastcode*)
. = ALIGN(4);
_edata = .; /* define a global symbol at data end; used by startup code in order to initialise the .data section in RAM */
} > RAM
/* The (non fastcode) program code and other data goes into FLASH */
.text : {
. = ALIGN(4);
start_spiflash1.o(.text) /* c runtime initialization (code) */
/*
* I do not understand why, but if I do not put this section, I got
* an overlapping sections error with some programs (for instance pi.c
* or C++ programs)
*/
*(.eh_frame)
*(.eh_frame_hdr)
*(.init_array)
*(.gcc_except_table*)
*(.text*) /* .text* sections (code) */
. = ALIGN(4);
*(.rodata*) /* .rodata* sections (constants, strings, etc.) */
*(.srodata*) /* .rodata* sections (constants, strings, etc.) */
_etext = .; /* define a global symbol at end of code */
_sidata = _etext; /* This is used by the startup in order to initialize the .data section */
} >FLASH
/* Uninitialized data section */
.bss : {
. = ALIGN(4);
_sbss = .; /* define a global symbol at bss start; used by startup code */
*(.bss*)
*(.sbss*)
*(COMMON)
. = ALIGN(4);
_ebss = .; /* define a global symbol at bss end; used by startup code */
} >RAM
}

87
FIRMWARE/spiflash3.ld Normal file
View File

@@ -0,0 +1,87 @@
/* Linker script for programs stored in SPI flash */
/* Inspired from picorv32/picosoc/sections.lds */
/* */
/* text and rodata sections are sent to flash */
/* bss sections are sent to BRAM */
/* data sections are sent to BRAM and have */
/* initialization data in flash. */
/* AT keyword specifies LMA (Load Memory Address) */
MEMORY {
FLASH (rx) : ORIGIN = 0x00820000, LENGTH = 0x100000 /* 4 MB in flash */
RAM (rwx) : ORIGIN = 0x00000000, LENGTH = 0x1800 /* 6 kB in RAM */
}
SECTIONS {
/*
* This is the initialized data and fastcode section
* The program executes knowing that the data is in the RAM
* but the loader puts the initial values in the FLASH (inidata).
* It is one task of the startup (crt0_spiflash.S) to copy the initial values from FLASH to RAM.
*/
.data_and_fastcode : AT ( _sidata ) {
. = ALIGN(4);
_sdata = .; /* create a global symbol at data start; used by startup code in order to initialise the .data section in RAM */
_ram_start = .; /* create a global symbol at ram start (e.g., for garbage collector) */
/* Initialized data */
*(.data*)
*(.sdata*)
/* integer mul and div */
*/libgcc.a:muldi3.o(.text)
*/libgcc.a:div.o(.text)
/* putchar.o(.text) */
/* functions with attribute((section(".fastcode"))) */
*(.fastcode*)
. = ALIGN(4);
_edata = .; /* define a global symbol at data end; used by startup code in order to initialise the .data section in RAM */
} > RAM
/* The (non fastcode) program code and other data goes into FLASH */
.text : {
. = ALIGN(4);
start_spiflash1.o(.text) /* c runtime initialization (code) */
/*
* I do not understand why, but if I do not put this section, I got
* an overlapping sections error with some programs (for instance pi.c
* or C++ programs)
*/
*(.eh_frame)
*(.eh_frame_hdr)
*(.init_array*)
*(.gcc_except_table*)
*(.text*) /* .text* sections (code) */
. = ALIGN(4);
*(.rodata*) /* .rodata* sections (constants, strings, etc.) */
*(.srodata*) /* .rodata* sections (constants, strings, etc.) */
_etext = .; /* define a global symbol at end of code */
_sidata = _etext; /* This is used by the startup in order to initialize the .data section */
} >FLASH
/* Uninitialized data section */
.bss : {
. = ALIGN(4);
_sbss = .; /* define a global symbol at bss start; used by startup code */
*(.bss*)
*(.sbss*)
*(COMMON)
. = ALIGN(4);
_ebss = .; /* define a global symbol at bss end; used by startup code */
} >RAM
/* this is to define the start of the heap, and make sure we have a minimum size */
.heap : {
. = ALIGN(4);
_heap_start = .; /* define a global symbol at heap start */
_end = .; /* as expected by syscalls.c */
} >RAM
}

9
FIRMWARE/start.S Normal file
View File

@@ -0,0 +1,9 @@
.equ IO_BASE, 0x400000
.section .text
.globl start
start:
li gp,IO_BASE
li sp,0x1800
call main
ebreak

View File

@@ -0,0 +1,9 @@
.equ IO_BASE, 0x400000
.section .text
.globl start
start:
li gp,IO_BASE
li sp,0x20000
call main
ebreak

View File

@@ -0,0 +1,43 @@
.equ IO_BASE, 0x400000
.text
.global _start
.type _start, @function
_start:
.option push
.option norelax
li gp,IO_BASE
.option pop
li sp,0x1800
# zero-init bss section:
# clears from _sbss to _ebss
# _sbss and _ebss are defined by linker script (spiflash.ld)
la a0, _sbss
la a1, _ebss
bge a0, a1, end_init_bss
loop_init_bss:
sw zero, 0(a0)
addi a0, a0, 4
blt a0, a1, loop_init_bss
end_init_bss:
# copy data section from SPI Flash to BRAM:
# copies from _sidata (in flash) to _sdata ... _edata (in BRAM)
# _sidata, _sdata and _edata are defined by linker script (spiflash.ld)
la a0, _sidata
la a1, _sdata
la a2, _edata
bge a1, a2, end_init_data
loop_init_data:
lw a3, 0(a0)
sw a3, 0(a1)
addi a0, a0, 4
addi a1, a1, 4
blt a1, a2, loop_init_data
end_init_data:
call main
ebreak

13
FIRMWARE/test_rdcycle.c Normal file
View File

@@ -0,0 +1,13 @@
#include "perf.h"
int main() {
for(int i=0; i<100; ++i) {
uint64_t cycles = rdcycle();
uint64_t instret = rdinstret();
printf("i=%d cycles=%d instret=%d\n", i, (int)cycles, (int)instret);
}
uint64_t instret = rdinstret();
uint64_t cycles = rdcycle();
printf("cycles=%d instret=%d 100CPI=%d\n", (int)cycles, (int)instret, (int)(100*cycles/instret));
}

17
FIRMWARE/test_spi_flash.c Normal file
View File

@@ -0,0 +1,17 @@
#include <stdio.h>
#include <stdint.h>
#define SPI_FLASH_BASE ((uint32_t*)(1 << 23))
int main() {
for(;;) {
for(int i=0; i<40; ++i) {
uint32_t word = SPI_FLASH_BASE[i];
char* c = (char*)&word;
printf("%d 0x%x %c%c%c%c\n", i, word, c[0],c[1],c[2],c[3]);
}
printf("\n");
printf("\n");
}
}

444
FIRMWARE/tinyraytracer.c Normal file
View File

@@ -0,0 +1,444 @@
/* A port of Dmitry Sokolov's tiny raytracer to C and to FemtoRV32 */
/* Displays on the small OLED display and/or HDMI */
/* Bruno Levy, 2020 */
/* Original tinyraytracer: https://github.com/ssloy/tinyraytracer */
#include <stdint.h>
#include <math.h>
#include <stdlib.h>
/*******************************************************************/
typedef int BOOL;
static inline float max(float x, float y) { return x>y?x:y; }
static inline float min(float x, float y) { return x<y?x:y; }
/*******************************************************************/
// If you want to adapt tinyraytracer to your own platform, there are
// mostly two macros and two functions to write:
// graphics_width
// graphics_height
// graphics_init()
// graphics_set_pixel()
//
// You can also write the following functions (or leave them empty if
// you do not need them):
// graphics_terminate()
// stats_begin_frame()
// stats_begin_pixel()
// stats_end_pixel()
// stats_end_frame()
// Size of the screen
// Replace with your own variables or values
#define graphics_width 120
#define graphics_height 60
// Two pixels per character using UTF8 character set
// (comment-out if terminal does not support it)
#define graphics_double_lines
// Replace with your own stuff to initialize graphics
static inline void graphics_init() {
printf("\033[48;5;16m" // set background color black
"\033[H" // home
"\033[2J"); // clear screen
}
// Replace with your own stuff to terminate graphics or leave empty
// Here I send <ctrl><D> to the UART, to exit the simulation in Verilator,
// it is captured by special code in RTL/DEVICES/uart.v
static inline void graphics_terminate() {
}
// Replace with your own code.
void graphics_set_pixel(int x, int y, float r, float g, float b) {
r = max(0.0f, min(1.0f, r));
g = max(0.0f, min(1.0f, g));
b = max(0.0f, min(1.0f, b));
uint8_t R = (uint8_t)(255.0f * r);
uint8_t G = (uint8_t)(255.0f * g);
uint8_t B = (uint8_t)(255.0f * b);
#ifdef graphics_double_lines
static uint8_t prev_R=0;
static uint8_t prev_G=0;
static uint8_t prev_B=0;
if(y&1) {
if((R == prev_R) && (G == prev_G) && (B == prev_B)) {
printf("\033[48;2;%d;%d;%dm ",(int)R,(int)G,(int)B);
} else {
printf("\033[48;2;%d;%d;%dm",(int)prev_R,(int)prev_G,(int)prev_B);
printf("\033[38;2;%d;%d;%dm",(int)R,(int)G,(int)B);
// https://www.w3.org/TR/xml-entity-names/025.html
// https://onlineunicodetools.com/convert-unicode-to-utf8
printf("\xE2\x96\x83");
}
if(x == graphics_width-1) {
printf("\033[38;2;0;0;0m");
printf("\033[48;2;0;0;0m\n");
}
} else {
prev_R = R;
prev_G = G;
prev_B = B;
}
#else
printf("\033[48;2;%d;%d;%dm ",(int)R,(int)G,(int)B);
if(x == graphics_width-1) {
printf("\033[48;2;0;0;0m\n");
}
#endif
}
// Begins statistics collection for current frame.
// Leave emtpy if not needed.
static inline stats_begin_frame() {
}
// Begins statistics collection for current pixel
// Leave emtpy if not needed.
// There are these two levels because on some
// femtorv32 cores (quark, tachyon), the clock tick counter does not
// have sufficient bits and will wrap during the time taken by
// rendering a frame (up to several minutes).
static inline stats_begin_pixel() {
}
// Ends statistics collection for current pixel
// Leave emtpy if not needed.
static inline stats_end_pixel() {
}
// Ends statistics collection for current frame
// and displays result.
// Leave emtpy if not needed.
static inline stats_end_frame() {
}
// Normally you will not need to modify anything beyond that point.
/*******************************************************************/
typedef struct { float x,y,z; } vec3;
typedef struct { float x,y,z,w; } vec4;
static inline vec3 make_vec3(float x, float y, float z) {
vec3 V;
V.x = x; V.y = y; V.z = z;
return V;
}
static inline vec4 make_vec4(float x, float y, float z, float w) {
vec4 V;
V.x = x; V.y = y; V.z = z; V.w = w;
return V;
}
static inline vec3 vec3_neg(vec3 V) {
return make_vec3(-V.x, -V.y, -V.z);
}
static inline vec3 vec3_add(vec3 U, vec3 V) {
return make_vec3(U.x+V.x, U.y+V.y, U.z+V.z);
}
static inline vec3 vec3_sub(vec3 U, vec3 V) {
return make_vec3(U.x-V.x, U.y-V.y, U.z-V.z);
}
static inline float vec3_dot(vec3 U, vec3 V) {
return U.x*V.x+U.y*V.y+U.z*V.z;
}
static inline vec3 vec3_scale(float s, vec3 U) {
return make_vec3(s*U.x, s*U.y, s*U.z);
}
static inline float vec3_length(vec3 U) {
return sqrtf(U.x*U.x+U.y*U.y+U.z*U.z);
}
static inline vec3 vec3_normalize(vec3 U) {
return vec3_scale(1.0f/vec3_length(U),U);
}
/*************************************************************************/
typedef struct Light {
vec3 position;
float intensity;
} Light;
Light make_Light(vec3 position, float intensity) {
Light L;
L.position = position;
L.intensity = intensity;
return L;
}
/*************************************************************************/
typedef struct {
float refractive_index;
vec4 albedo;
vec3 diffuse_color;
float specular_exponent;
} Material;
Material make_Material(float r, vec4 a, vec3 color, float spec) {
Material M;
M.refractive_index = r;
M.albedo = a;
M.diffuse_color = color;
M.specular_exponent = spec;
return M;
}
Material make_Material_default() {
Material M;
M.refractive_index = 1;
M.albedo = make_vec4(1,0,0,0);
M.diffuse_color = make_vec3(0,0,0);
M.specular_exponent = 0;
return M;
}
/*************************************************************************/
typedef struct {
vec3 center;
float radius;
Material material;
} Sphere;
Sphere make_Sphere(vec3 c, float r, Material M) {
Sphere S;
S.center = c;
S.radius = r;
S.material = M;
return S;
}
BOOL Sphere_ray_intersect(Sphere* S, vec3 orig, vec3 dir, float* t0) {
vec3 L = vec3_sub(S->center, orig);
float tca = vec3_dot(L,dir);
float d2 = vec3_dot(L,L) - tca*tca;
float r2 = S->radius*S->radius;
if (d2 > r2) return 0;
float thc = sqrtf(r2 - d2);
*t0 = tca - thc;
float t1 = tca + thc;
if (*t0 < 0) *t0 = t1;
if (*t0 < 0) return 0;
return 1;
}
vec3 reflect(vec3 I, vec3 N) {
return vec3_sub(I, vec3_scale(2.f*vec3_dot(I,N),N));
}
vec3 refract(vec3 I, vec3 N, float eta_t, float eta_i /* =1.f */) {
// Snell's law
float cosi = -max(-1.f, min(1.f, vec3_dot(I,N)));
// if the ray comes from the inside the object, swap the air and the media
if (cosi<0) return refract(I, vec3_neg(N), eta_i, eta_t);
float eta = eta_i / eta_t;
float k = 1 - eta*eta*(1 - cosi*cosi);
// k<0 = total reflection, no ray to refract.
// I refract it anyways, this has no physical meaning
return k<0 ? make_vec3(1,0,0)
: vec3_add(vec3_scale(eta,I),vec3_scale((eta*cosi - sqrtf(k)),N));
}
BOOL scene_intersect(
vec3 orig, vec3 dir, Sphere* spheres, int nb_spheres,
vec3* hit, vec3* N, Material* material
) {
float spheres_dist = 1e30;
for(int i=0; i<nb_spheres; ++i) {
float dist_i;
if(
Sphere_ray_intersect(&spheres[i], orig, dir, &dist_i) &&
(dist_i < spheres_dist)
) {
spheres_dist = dist_i;
*hit = vec3_add(orig,vec3_scale(dist_i,dir));
*N = vec3_normalize(vec3_sub(*hit, spheres[i].center));
*material = spheres[i].material;
}
}
float checkerboard_dist = 1e30;
if (fabs(dir.y)>1e-3) {
float d = -(orig.y+4)/dir.y; // the checkerboard plane has equation y = -4
vec3 pt = vec3_add(orig, vec3_scale(d,dir));
if (d>0 && fabs(pt.x)<10 && pt.z<-10 && pt.z>-30 && d<spheres_dist) {
checkerboard_dist = d;
*hit = pt;
*N = make_vec3(0,1,0);
material->diffuse_color =
(((int)(.5*hit->x+1000) + (int)(.5*hit->z)) & 1)
? make_vec3(.3, .3, .3)
: make_vec3(.3, .2, .1);
}
}
return min(spheres_dist, checkerboard_dist)<1000;
}
vec3 cast_ray(
vec3 orig, vec3 dir, Sphere* spheres, int nb_spheres,
Light* lights, int nb_lights, int depth /* =0 */
) {
vec3 point,N;
Material material = make_Material_default();
if (
depth>2 ||
!scene_intersect(orig, dir, spheres, nb_spheres, &point, &N, &material)
) {
float s = 0.5*(dir.y + 1.0);
return vec3_add(
vec3_scale(s,make_vec3(0.2, 0.7, 0.8)),
vec3_scale(s,make_vec3(0.0, 0.0, 0.5))
);
}
vec3 reflect_dir=vec3_normalize(reflect(dir, N));
vec3 refract_dir=vec3_normalize(refract(dir,N,material.refractive_index,1));
// offset the original point to avoid occlusion by the object itself
vec3 reflect_orig =
vec3_dot(reflect_dir,N) < 0
? vec3_sub(point,vec3_scale(1e-3,N))
: vec3_add(point,vec3_scale(1e-3,N));
vec3 refract_orig =
vec3_dot(refract_dir,N) < 0
? vec3_sub(point,vec3_scale(1e-3,N))
: vec3_add(point,vec3_scale(1e-3,N));
vec3 reflect_color = cast_ray(
reflect_orig, reflect_dir, spheres, nb_spheres,
lights, nb_lights, depth + 1
);
vec3 refract_color = cast_ray(
refract_orig, refract_dir, spheres, nb_spheres,
lights, nb_lights, depth + 1
);
float diffuse_light_intensity = 0, specular_light_intensity = 0;
for (int i=0; i<nb_lights; i++) {
vec3 light_dir = vec3_normalize(vec3_sub(lights[i].position,point));
float light_distance = vec3_length(vec3_sub(lights[i].position,point));
vec3 shadow_orig =
vec3_dot(light_dir,N) < 0
? vec3_sub(point,vec3_scale(1e-3,N))
: vec3_add(point,vec3_scale(1e-3,N)) ;
// checking if the point lies in the shadow of the lights[i]
vec3 shadow_pt, shadow_N;
Material tmpmaterial;
if (
scene_intersect(
shadow_orig, light_dir, spheres, nb_spheres,
&shadow_pt, &shadow_N, &tmpmaterial
) && (
vec3_length(vec3_sub(shadow_pt,shadow_orig)) < light_distance
)
) continue ;
diffuse_light_intensity +=
lights[i].intensity * max(0.f, vec3_dot(light_dir,N));
float abc = max(
0.f, vec3_dot(vec3_neg(reflect(vec3_neg(light_dir), N)),dir)
);
float def = material.specular_exponent;
if(abc > 0.0f && def > 0.0f) {
specular_light_intensity += powf(abc,def)*lights[i].intensity;
}
}
vec3 result = vec3_scale(
diffuse_light_intensity * material.albedo.x, material.diffuse_color
);
result = vec3_add(
result, vec3_scale(specular_light_intensity * material.albedo.y,
make_vec3(1,1,1))
);
result = vec3_add(result, vec3_scale(material.albedo.z, reflect_color));
result = vec3_add(result, vec3_scale(material.albedo.w, refract_color));
return result;
}
static inline void render_pixel(
int i, int j, Sphere* spheres, int nb_spheres, Light* lights, int nb_lights
) {
const float fov = M_PI/3.;
stats_begin_pixel();
float dir_x = (i + 0.5) - graphics_width/2.;
float dir_y = -(j + 0.5) + graphics_height/2.; // this flips the image.
float dir_z = -graphics_height/(2.*tan(fov/2.));
vec3 C = cast_ray(
make_vec3(0,0,0), vec3_normalize(make_vec3(dir_x, dir_y, dir_z)),
spheres, nb_spheres, lights, nb_lights, 0
);
graphics_set_pixel(i,j,C.x,C.y,C.z);
stats_end_pixel();
}
void render(Sphere* spheres, int nb_spheres, Light* lights, int nb_lights) {
stats_begin_frame();
graphics_init();
#ifdef graphics_double_lines
for (int j = 0; j<graphics_height; j+=2) {
for (int i = 0; i<graphics_width; i++) {
render_pixel(i,j ,spheres,nb_spheres,lights,nb_lights);
render_pixel(i,j+1,spheres,nb_spheres,lights,nb_lights);
}
}
#else
for (int j = 0; j<graphics_height; j++) {
for (int i = 0; i<graphics_width; i++) {
render_pixel(i,j ,spheres,nb_spheres,lights,nb_lights);
}
}
#endif
graphics_terminate();
stats_end_frame();
}
int nb_spheres = 4;
Sphere spheres[4];
int nb_lights = 3;
Light lights[3];
void init_scene() {
Material ivory = make_Material(
1.0, make_vec4(0.6, 0.3, 0.1, 0.0), make_vec3(0.4, 0.4, 0.3), 50.
);
Material glass = make_Material(
1.5, make_vec4(0.0, 0.5, 0.1, 0.8), make_vec3(0.6, 0.7, 0.8), 125.
);
Material red_rubber = make_Material(
1.0, make_vec4(0.9, 0.1, 0.0, 0.0), make_vec3(0.3, 0.1, 0.1), 10.
);
Material mirror = make_Material(
1.0, make_vec4(0.0, 10.0, 0.8, 0.0), make_vec3(1.0, 1.0, 1.0), 142.
);
spheres[0] = make_Sphere(make_vec3(-3, 0, -16), 2, ivory);
spheres[1] = make_Sphere(make_vec3(-1.0, -1.5, -12), 2, glass);
spheres[2] = make_Sphere(make_vec3( 1.5, -0.5, -18), 3, red_rubber);
spheres[3] = make_Sphere(make_vec3( 7, 5, -18), 4, mirror);
lights[0] = make_Light(make_vec3(-20, 20, 20), 1.5);
lights[1] = make_Light(make_vec3( 30, 50, -25), 1.8);
lights[2] = make_Light(make_vec3( 30, 20, 30), 1.7);
}
int main() {
init_scene();
render(spheres, nb_spheres, lights, nb_lights);
return 0;
}

173
FIRMWARE/tty_graphics.h Normal file
View File

@@ -0,0 +1,173 @@
#ifndef TTY_GRAPHICS_H
#define TTY_GRAPHICS_H
#include <stdio.h>
#include <stdint.h>
/**
* \brief Resets default tty colors (white foreground, black background)
* \details It is useful to call this function once all graphics are finished,
* else text output might be invisible or difficult to see depending on
* current foreground and background colors.
*/
static inline void tty_graphics_reset_colors() {
printf("\033[48;5;16m" // set background color black
"\033[38;5;15m" // set foreground color white
);
}
/**
* \brief Moves the cursor position to the origin (top left).
*/
static inline void tty_graphics_home() {
printf("\033[H");
}
/**
* \brief Clears the terminal.
*/
static inline void tty_graphics_clear() {
printf("\033[2J");
}
/**
* \brief Initializes "graphics mode".
* \details resets default colors, clears the terminal and moves the
* cursor to the top-left position.
*/
static inline void tty_graphics_init() {
tty_graphics_reset_colors();
tty_graphics_home();
tty_graphics_clear();
}
/**
* \brief Terminates "graphics mode".
* \details Restores default foreground and background colors.
*/
static inline void tty_graphics_terminate() {
tty_graphics_reset_colors();
}
/**
* \brief Moves the cursor to a specific location.
*/
static inline void tty_graphics_gotoXY(int x, int y) {
printf("\033[%d;%dH",y,x);
}
/**
* \brief Draws a "pixel" (a block) at the current
* cursor position and advances the current cursor
* position.
*/
static inline void tty_graphics_draw_one_pixel(
uint8_t r, uint8_t g, uint8_t b
) {
printf("\033[48;2;%d;%d;%dm ",(int)r,(int)g,(int)b);
}
/**
* \brief Draws two "pixels" at the current
* cursor position and advances the current cursor
* position.
* \details Characters are roughly twice as high as wide.
* To generate square pixels, this function draws two pixels in
* the same character, using the special lower-half white / upper-half
* black character, and setting the background and foreground colors.
*/
static inline void tty_graphics_draw_two_pixels(
uint8_t r1, uint8_t g1, uint8_t b1,
uint8_t r2, uint8_t g2, uint8_t b2
) {
if((r2 == r1) && (g2 == g1) && (b2 == b1)) {
tty_graphics_draw_one_pixel(r1,g1,b1);
} else {
printf("\033[48;2;%d;%d;%dm",(int)r1,(int)g1,(int)b1);
printf("\033[38;2;%d;%d;%dm",(int)r2,(int)g2,(int)b2);
// https://www.w3.org/TR/xml-entity-names/025.html
// https://onlineunicodetools.com/convert-unicode-to-utf8
// https://copypastecharacter.com/
printf("\xE2\x96\x83");
}
}
/**
* \brief Moves the cursor position to the next line.
* \details Background and foreground colors are set to black.
*/
static inline void tty_graphics_newline() {
printf("\033[38;2;0;0;0m");
printf("\033[48;2;0;0;0m\n");
}
typedef void (*tty_graphics_pixelfunc)(int x, int y, uint8_t* r, uint8_t* g, uint8_t* b);
typedef void (*tty_graphics_fpixelfunc)(int x, int y, float* r, float* g, float* b);
/**
* \brief Draws an image by calling a user-specified function for each pixel.
* \param[in] width , height dimension of the image in square pixels
* \param[in] do_pixel the user function to be called for each pixel (a "shader"), that
* determines the (integer) components r,g,b of the pixel's color.
* \details Uses half-charater pixels.
*/
static inline void tty_graphics_scan(int width, int height, tty_graphics_pixelfunc do_pixel) {
uint8_t r1, g1, b1;
uint8_t r2, g2, b2;
tty_graphics_home();
for (int j = 0; j<height; j+=2) {
for (int i = 0; i<width; i++) {
do_pixel(i,j , &r1, &g1, &b1);
do_pixel(i,j+1, &r2, &g2, &b2);
tty_graphics_draw_two_pixels(r1,g1,b1,r2,g2,b2);
if(i == width-1) {
tty_graphics_newline();
}
}
}
}
/**
* brief Converts a floating point value to a byte.
* \param[in] the floating point value in [0,1]
* \return the byte, in [0,255]
* \details the input value is clamped to [0,1]
*/
static inline uint8_t tty_graphics_ftoi(float f) {
f = (f < 0.0f) ? 0.0f : f;
f = (f > 1.0f) ? 1.0f : f;
return (uint8_t)(255.0f * f);
}
/**
* \brief Draws an image by calling a user-specified function for each pixel.
* \param[in] width , height dimension of the image in square pixels
* \param[in] do_pixel the user function to be called for each pixel (a "shader"), that
* determines the (floating-point) components fr,fg,fb of the pixel's color.
* \details Uses half-charater pixels.
*/
static inline void tty_graphics_fscan(int width, int height, tty_graphics_fpixelfunc do_pixel) {
float fr1, fg1, fb1;
float fr2, fg2, fb2;
uint8_t r1, g1, b1;
uint8_t r2, g2, b2;
tty_graphics_home();
for (int j = 0; j<height; j+=2) {
for (int i = 0; i<width; i++) {
do_pixel(i,j , &fr1, &fg1, &fb1);
r1 = tty_graphics_ftoi(fr1);
g1 = tty_graphics_ftoi(fg1);
b1 = tty_graphics_ftoi(fb1);
do_pixel(i,j+1, &fr2, &fg2, &fb2);
r2 = tty_graphics_ftoi(fr2);
g2 = tty_graphics_ftoi(fg2);
b2 = tty_graphics_ftoi(fb2);
tty_graphics_draw_two_pixels(r1,g1,b1,r2,g2,b2);
if(i == width-1) {
tty_graphics_newline();
}
}
}
}
#endif

View File

@@ -0,0 +1,38 @@
#include "tty_graphics.h"
#include <math.h>
#ifdef __linux__
#include <stdlib.h>
#include <unistd.h>
#endif
// Size of the screen
// Replace with your own variables or values
#define graphics_width 80
#define graphics_height 40
int frame = 0;
float f = 0.0;
void do_pixel(int i, int j, float* R, float* G, float* B) {
float x = (float)i;
float y = (float)j;
*R = 0.5f*(sin(x*0.1+f)+1.0);
*G = 0.5f*(sin(y*0.1+2.0*f)+1.0);
*B = 0.5f*(sin((x+y)*0.05-3.0*f)+1.0);
}
int main() {
tty_graphics_init();
for(;;) {
tty_graphics_fscan(graphics_width, graphics_height, do_pixel);
f += 0.1;
++frame;
tty_graphics_reset_colors();
printf("frame = %d\n",frame);
#ifdef __linux__
usleep(40000);
#endif
}
return 0;
}

11
FIRMWARE/wait.S Normal file
View File

@@ -0,0 +1,11 @@
.section .text
.globl wait
wait:
li t0,1
slli t0, t0,17
.L0:
addi t0,t0,-1
bnez t0, .L0
ret

3694
LESSON1.md Normal file

File diff suppressed because it is too large Load Diff

8
README.md Normal file
View File

@@ -0,0 +1,8 @@
## Toolchain
- Yosys / Yosys NextPNR / Yosys Apicula
## ToDo
- Check documentation Yosys !
- TOBB labs
- Anki cards

24
step1.v Normal file
View File

@@ -0,0 +1,24 @@
/**
* Step 1: Blinker
* DONE
*/
`default_nettype none
module SOC (
input clk, // system clock
input rst_i, // reset button
output [3:0] led, // system LEDs
input RXD, // UART receive
output TXD // UART transmit
);
// A blinker that counts on 5 bits, wired to the 5 LEDs
reg [3:0] count = 0;
always @(posedge clk) begin
count <= count + 1;
end
assign led = count;
assign TXD = 1'b0; // not used for now
endmodule

40
step2.v Normal file
View File

@@ -0,0 +1,40 @@
/**
* Step 2: Blinker (slower version)
* DONE*
*/
`default_nettype none
`include "clockworks.v"
module SOC (
input clk, // system clock
input rst_i, // reset button
output [4:0] led, // system LEDs
input RXD, // UART receive
output TXD // UART transmit
);
wire clkI; // internal clock
wire resetn; // internal reset signal, goes low on reset
// A blinker that counts on 5 bits, wired to the 5 LEDs
reg [4:0] count = 0;
always @(posedge clkI) begin
count <= !resetn ? 0 : count + 1;
end
// Clock gearbox (to let you see what happens)
// and reset circuitry (to workaround an
// initialization problem with Ice40)
Clockworks #(
.SLOW(21) // Divide clock frequency by 2^21
)CW(
.CLK(clk),
.RESET(rst_i),
.clk(clkI),
.resetn(resetn)
);
assign led = count;
assign TXD = 1'b0; // not used for now
endmodule

65
step3.v Normal file
View File

@@ -0,0 +1,65 @@
/**
* Step 3: Display a led pattern "animation" stored in BRAM.
* DONE*
*/
`default_nettype none
`include "clockworks.v"
module SOC (
input clk, // system clock
input rst_i, // reset button
output [4:0] led, // system LEDs
input RXD, // UART receive
output TXD // UART transmit
);
wire clkI; // internal clock
wire resetn; // internal reset signal, goes low on reset
reg [4:0] PC = 0;
reg [4:0] MEM [0:20];
initial begin
MEM[0] = 5'b00000;
MEM[1] = 5'b00001;
MEM[2] = 5'b00010;
MEM[3] = 5'b00100;
MEM[4] = 5'b01000;
MEM[5] = 5'b10000;
MEM[6] = 5'b10001;
MEM[7] = 5'b10010;
MEM[8] = 5'b10100;
MEM[9] = 5'b11000;
MEM[10] = 5'b11001;
MEM[11] = 5'b11010;
MEM[12] = 5'b11100;
MEM[13] = 5'b11101;
MEM[14] = 5'b11110;
MEM[15] = 5'b11111;
MEM[16] = 5'b11110;
MEM[17] = 5'b11100;
MEM[18] = 5'b11000;
MEM[19] = 5'b10000;
MEM[20] = 5'b00000;
end
reg [4:0] leds = 0;
assign led=leds;
always @(posedge clkI) begin
leds <= MEM[PC];
PC <= (!resetn || PC==20) ? 0 : (PC+1);
end
// Gearbox and reset circuitry.
Clockworks #(
.SLOW(25) // Divide clock frequency by 2^21
)CW(
.CLK(clk),
.RESET(rst_i),
.clk(clkI),
.resetn(resetn)
);
assign TXD = 1'b0; // not used for now
endmodule

59
step3K.v Normal file
View File

@@ -0,0 +1,59 @@
`include "clockworks.v"
module SOC (
input clk,
input rst_i,
output [4:0] led,
output TXD,
input RXD
);
wire clkI, resetn;
reg [4:0] PC = 0;
reg [4:0] MEM [0:20];
initial begin
MEM[0] = 5'b00000;
MEM[1] = 5'b00001;
MEM[2] = 5'b00010;
MEM[3] = 5'b00100;
MEM[4] = 5'b01000;
MEM[5] = 5'b10000;
MEM[6] = 5'b10001;
MEM[7] = 5'b10010;
MEM[8] = 5'b10100;
MEM[9] = 5'b11000;
MEM[10] = 5'b11001;
MEM[11] = 5'b11010;
MEM[12] = 5'b11100;
MEM[13] = 5'b11101;
MEM[14] = 5'b11110;
MEM[15] = 5'b11111;
MEM[16] = 5'b11110;
MEM[17] = 5'b11100;
MEM[18] = 5'b11000;
MEM[19] = 5'b10000;
MEM[20] = 5'b00000;
end
reg [4:0] leds = 0;
assign led = leds;
always @(posedge clkI) begin
leds <= MEM[PC];
PC <= (!resetn || PC == 20) ? 0 : (PC + 1);
end
Clockworks #(
.SLOW(21)
)clkw(
.CLK(clk),
.RESET(rst_i),
.clk(clkI),
.resetn(resetn)
);
assign TXD = 1'b0;
endmodule