newStep.v

This commit is contained in:
2025-11-27 04:28:54 +03:00
parent a84b8fcfde
commit 6e38a6c1af
85 changed files with 25646 additions and 6801 deletions

489
RTL/DEVICES/FGA.v Normal file
View File

@@ -0,0 +1,489 @@
// femtorv32, a minimalistic RISC-V RV32I core
// Bruno Levy, 2020-2021
//
// This file: FGA: Femto Graphics Adapter
// Note: VRAM is write-only ! (the read port is used by HDMI)
//
// sel_cntl / io_wstrb / io_rstrb gives access to the set of control
// registers and commands:
//
// Write: set register: value[31:8] REG_XXX[7:0]
// command (1 arg): arg24[31:8] 1[7] CMD_XXX[6:0]
// command (2 args): arg12_1[31:20] arg12_2[19:8] 1[7] CMD_XXX[6:0]
//
// Read: the value of the register indicated by REG_READREGID
//
// Registers:
// REG_STATUS (0): vblank[31] hblank[30] drawarea[29] membusy[28] XXXX[27:24] Y[23:12] X[11:0]
// RESOLUTION (1): height[23:12] width[11:0]
// COLORMODE (2): colormapped[3] bpp[2:0] (0:1bpp 1:2bpp 2:4bpp 3:8bpp 4:16bpp)
// DISPLAYMODE (3): magnify[0]
// ORIGIN (4): origin_pixel_address[23:0] (first scanline starts at this pixel address)
// WRAP (5): wrap_pixel_address[23:0] (restart at pixel address 0 when reached)
// READREGID (6): mapped_regid[2:0] (the register mapped for read access)
//
// Commands:
// SET_PALETTE_R (1) arg12_1: cmap entry arg12_2: R
// SET_PALETTE_G (2) arg12_1: cmap entry arg12_2: G
// SET_PALETTE_B (3) arg12_1: cmap entry arg12_2: B
// SET_WWINDOW_X (4) arg12_1: x1 arg12_2: x2
// SET_WWINDOW_Y (5) arg12_1: y1 arg12_2: y2
// FILLRECT (6) arg24: color
//
// The window [x1-x2] [y1-y2] can be used in two different ways:
// - FILLRECT fills it with the specified color. Operation is
// complete when membusy goes low in REG_STATUS.
// - individual pixel values can be specified one by one by
// writing to the DAT mapped IO (io_wstrb + sel_dat), pixel
// address is incremented automatically.
// This allows emulation of SSD1331/SSD1351 "window write"
// command in the three modes for OLED-HDMI mirroring
//
// See FIRMWARE/LIBFEMTOGL/FGA.h, FGA.c and FGA_mode.c
// "Physical mode" sent to the HDMI (choose one of them)
// Note: > 640x480 may make timings fail
//`define MODE_640x480
`define MODE_800x600
//`define MODE_1024x768
//`define MODE_1280x1024
`include "GFX_hdmi.v"
module FGA(
input wire pclk, // board clock
input wire clk, // system clock
input wire sel, // if zero, writes are ignored
input wire [3:0] mem_wmask, // mem write mask and strobe
input wire [16:0] mem_address, // address in graphic memory (128K), word-aligned
input wire [31:0] mem_wdata, // data to be written
output wire [3:0] gpdi_dp, // HDMI signals, blue, green, red, clock
// dgpi_dn generated by pins (see ulx3s.lpf)
input wire io_wstrb,
input wire io_rstrb,
input wire sel_cntl, // IO: select control register (RW)
input wire sel_dat, // IO: select data input (W)
output wire [31:0] rdata // data read
);
`include "GFX_modes.v"
wire pixel_clk;
reg [31:0] VRAM[0:32767];
reg [23:0] PALETTE[0:255];
/************************* HDMI signal generation ***************************/
// Video mode parameters
localparam MODE_1bpp = 3'd0;
localparam MODE_2bpp = 3'd1;
localparam MODE_4bpp = 3'd2;
localparam MODE_8bpp = 3'd3;
localparam MODE_16bpp = 3'd4;
reg [11:0] mode_width;
reg [11:0] mode_height;
reg [2:0] mode_bpp; // see MODE_xbpp constants
reg mode_colormapped;
reg mode_magnify; // asserted for pixel doubling
reg [23:0] mode_origin_pix_address;
reg [23:0] mode_wrap_pix_address;
// This part is just like a VGA generator.
reg [11:0] X, Y; // current pixel coordinates
reg hsync, vsync; // horizontal and vertical synchronization
reg draw_area; // asserted if current pixel is in drawing area
reg mem_busy; // asserted if memory transfer is running.
// Data read from control register
reg [31:0] read_reg;
assign rdata = (io_rstrb && sel_cntl) ? read_reg : 32'b0;
// We are going to fetch data from video RAM (now stored in BRAM), and then,
// in colormapped modes, fetch colormap entry. Each fetch introduces some
// latency -> there is a small pixel pipeline. Each stage needs to have
// its own copy of all registers it needs (that is, copy pixel address
// between stage 1 and stage 2 to keep it in sync with pixel data).
//
// Stage 0 generates the X,Y coordinates and horizontal,vertical sync signals
// (standard in all VGA/DVI/HDMI drivers)
// Stage 1 generates the pixel address. The unit is in number of pixels.
// it handles pixel doubling/scanline doubling in 320x200 resolutions
// it also handles page flipping, with the ORIGIN register.
// Stage 2 fetches pixel data from RAM. It handles pixel address -> word address
// translation. It creates its own copy of pixel_address to keep it in
// sync with pixel data (1 clock latency)
// Stage 3 generates R,G,B either from colormap lookup (mode 1 and 2) or from
// 16 bit pixel data directly (mode 0). If colormap lookup is used,
// it generates an additional cycle of latency.
//
// Note: the first two pixel columns are wrong due to latency (the image is
// shifted two pixels to the right, with garbage in the first two columns),
// normally we should start fetching from the previous scanline, at the end
// of hsync, 1 clock in advance in mode 0, and two clocks in advance in mode 1.
// I was too lazy to do that, so I just hide the first two columns !
// (so there are two columns missing on the right side of the image).
// I will do that properly when VRAM will be stored in SDRAM (then I'll have no
// choice, latency will probably be significantly larger than 2 pixels).
// Stage 0: X,Y,vsync,hsync generation
always @(posedge pixel_clk) begin
if(X == GFX_line_width-1) begin
X <= 0;
Y <= (Y == GFX_lines-1) ? 0 : Y+1;
end else begin
X <= X+1;
end
hsync <= (X>=GFX_width+GFX_h_front_porch) &&
(X<GFX_width+GFX_h_front_porch+GFX_h_sync_width);
vsync <= (Y>=GFX_height+GFX_v_front_porch) &&
(Y<GFX_height+GFX_v_front_porch+GFX_v_sync_width);
draw_area <= (X<GFX_width) && (Y<GFX_height);
end
// Stage 1: pixel address generation
reg [23:0] pix_address;
reg [23:0] row_start_pix_address;
wire [23:0] next_row_start_pix_address =
((row_start_pix_address + {12'b0, mode_width}) <= mode_wrap_pix_address) ?
row_start_pix_address + {12'b0, mode_width} : 0 ;
// Generate pixel address based on scanning coordinates (X,Y) and
// magnify mode (that doubles the rows and doubles the pixels in
// the rows).
always @(posedge pixel_clk) begin
if(X == 0) begin
if(Y == 0) begin
row_start_pix_address <= mode_origin_pix_address;
pix_address <= mode_origin_pix_address;
end else begin
// Increment row address every 2 Y (2 because magnify)
if(Y[0] || !mode_magnify) begin
row_start_pix_address <= next_row_start_pix_address;
pix_address <= next_row_start_pix_address;
end else begin
pix_address <= row_start_pix_address;
end
end
end else begin
if(X[0] || !mode_magnify) pix_address <= pix_address + 1;
end
end
// Stage 2: pixel data fetch
reg [23:0] word_address;
always @(*) begin
case(mode_bpp)
MODE_16bpp: word_address = pix_address >> 1;
MODE_8bpp: word_address = pix_address >> 2;
MODE_4bpp: word_address = pix_address >> 3;
MODE_2bpp: word_address = pix_address >> 4;
MODE_1bpp: word_address = pix_address >> 5;
default: word_address = 0;
endcase
end
reg [23:0] pix_address_2;
reg [31:0] pix_word_data_2;
always @(posedge pixel_clk) begin
pix_address_2 <= pix_address;
pix_word_data_2 <= VRAM[word_address[14:0]]; // TODO
end
// Stage 3: generate R,G,B from pixel data
// combinatorial circuit to extract index from
// pixel data.
reg [7:0] pix_color_index_3;
/* verilator lint_off WIDTH */
always @(*) begin
case(mode_bpp)
MODE_8bpp: begin
pix_color_index_3 = pix_word_data_2 >> {pix_address_2[1:0], 3'b0};
end
MODE_4bpp: begin
pix_color_index_3[3:0] = pix_word_data_2 >> {pix_address_2[2:0], 2'b0};
pix_color_index_3[7:4] = 4'b0;
end
MODE_2bpp: begin
pix_color_index_3[1:0] = pix_word_data_2 >> {pix_address_2[3:0], 1'b0};
pix_color_index_3[7:2] = 6'b0;
end
MODE_1bpp: begin
pix_color_index_3[0] = pix_word_data_2 >> pix_address_2[4:0];
pix_color_index_3[7:1] = 7'b0;
end
default: begin
pix_color_index_3 = 0;
end
endcase
end
/* verilator lint_on WIDTH */
reg [11:0] maxX;
reg [11:0] maxY;
always @(posedge clk) begin
maxX <= mode_magnify ? (mode_width << 1) : mode_width;
maxY <= mode_magnify ? (mode_height << 1) : mode_height;
end
reg [7:0] R,G,B;
always @(posedge pixel_clk) begin
if(mode_colormapped) begin
{R,G,B} <= PALETTE[pix_color_index_3];
end else begin
if(pix_address_2[0]) begin
R <= {pix_word_data_2[31:27],3'b000};
G <= {pix_word_data_2[26:21],2'b00 };
B <= {pix_word_data_2[20:16],3'b000};
end else begin
R <= {pix_word_data_2[15:11],3'b000};
G <= {pix_word_data_2[10:5 ],2'b00 };
B <= {pix_word_data_2[ 4:0 ],3'b000};
end
end
// Hide what's outside the display zone.
// Hide the first two columns (I was too lazy to properly handle my
// pixel pipeline latency).
if(X == 0 || X == 1 || X >= maxX || Y >= maxY) {R,G,B} <= 24'b0;
end
// Video signal generation and HDMI
wire pixel_clk_x5; // The pixel_clk*5 freq clock used by the serializers (DDR)
// The graphic PLL, that generates the pixel clock (and freq*5 clock)
GFX_PLL gfx_pll(
.pclk(pclk),
.pixel_clk(pixel_clk),
.pixel_clk_x5(pixel_clk_x5)
);
// The HDMI encoder
GFX_hdmi hdmi(
.pixel_clk(pixel_clk), .pixel_clk_x5(pixel_clk_x5),
.R(R), .G(G), .B(B), .hsync(hsync), .vsync(vsync), .draw_area(draw_area),
.gpdi_dp(gpdi_dp)
);
/*************************************************************************/
wire is_command = mem_wdata[7];
wire [2:0] command = mem_wdata[2:0];
wire [2:0] set_regid = mem_wdata[2:0];
wire[23:0] arg24 = mem_wdata[31:8];
wire[11:0] arg12_1 = mem_wdata[19:8];
wire[11:0] arg12_2 = mem_wdata[31:20];
localparam REG_STATUS = 3'd0;
localparam REG_RESOLUTION = 3'd1;
localparam REG_COLORMODE = 3'd2;
localparam REG_DISPLAYMODE = 3'd3;
localparam REG_ORIGIN = 3'd4;
localparam REG_WRAP = 3'd5;
localparam REG_READREGID = 3'd6;
localparam CMD_SET_PALETTE_R = 3'd1;
localparam CMD_SET_PALETTE_G = 3'd2;
localparam CMD_SET_PALETTE_B = 3'd3;
localparam CMD_SET_WWINDOW_X = 3'd4;
localparam CMD_SET_WWINDOW_Y = 3'd5;
localparam CMD_FILLRECT = 3'd6;
// Windowed-pixel write and fillrect command.
//
// - write window command, two commands:
// (send 32 bits to IO_FGA_CNTL hardware register)
// SET_WWINDOW_X: X1 X2
// SET_WWINDOW_Y: Y1 Y2
//
// - write data: send 16 bits to IO_FGA_DAT hardware register
// MSB first, encoding follows SSD1351: RRRRR GGGGG 0 BBBBB
//
// Note that once the window is properly initialized, the write
// data command emulates the SSD1351 OLED display, then by writing
// to both FGA and SSD1351 control registers, one clones the output
// of the SSD1351 oled display to the HDMI screen for free !
//
// See in <femtorv32.h>:
// #define IO_GFX_DAT (IO_SSD1351_DAT16 | IO_FGA_DAT)
// #define OLED_WRITE_DATA_UINT16(RGB) IO_OUT(IO_GFX_DAT,(RGB))
// #define OLED_WRITE_DATA_RGB(R,G,B) OLED_WRITE_DATA_UINT16(GL_RGB(R,G,B))
//
// This also works when FGA is in paletted mode (320x200x8bpp, 640x400x4bpp)
// since the write data command properly interprets pixel addresses. The
// only requirement is to have a palette that will correctly map the 8 LSBs
// / 4 LSBs of pixel data to a color. In libfemtorv32, this maps 0 to black
// and any non-zero to white (this is how COMMANDER is displayed in 640x400
// on the HDMI screen).
//
// To generate pixel data, there are two other options:
// - directly writing to VRAM from FemtoRV32
// - FILLRECT (see below)
reg [11:0] window_x1, window_x2, window_y1, window_y2, window_x, window_y;
reg [23:0] window_row_start;
reg [23:0] window_pixel_address;
reg [15:0] fill_color;
reg fill_rect;
// Data read from control register: depends on mapped register (read_regid)
reg [2:0] read_regid;
always @(posedge clk) begin
case(read_regid)
REG_RESOLUTION: read_reg <= {8'b0, mode_height, mode_width};
REG_COLORMODE: read_reg <= {28'b0, mode_colormapped, mode_bpp};
REG_DISPLAYMODE: read_reg <= {31'b0, mode_magnify};
REG_ORIGIN: read_reg <= {8'b0, mode_origin_pix_address};
REG_WRAP: read_reg <= {8'b0, mode_wrap_pix_address};
REG_READREGID: read_reg <= {29'b0, read_regid};
default: read_reg <= {(Y >= 400),(X >= 640),draw_area,mem_busy,4'b0,X,Y};
endcase
end
always @(posedge clk) begin
if(mem_busy && ((io_wstrb && sel_dat) || fill_rect)) begin
window_pixel_address <= window_pixel_address + 1;
window_x <= window_x + 1;
if(window_x == window_x2) begin
if(window_y == window_y2) begin
mem_busy <= 1'b0;
fill_rect <= 1'b0;
end else begin
window_y <= window_y+1;
window_x <= window_x1;
window_pixel_address <= window_row_start + {12'b0, mode_width};
window_row_start <= window_row_start + {12'b0, mode_width};
end
end
end
if(io_wstrb && sel_cntl) begin
if(is_command) begin
case(command)
CMD_SET_PALETTE_B: PALETTE[arg12_1[7:0]][7:0 ] <= arg12_2[7:0];
CMD_SET_PALETTE_G: PALETTE[arg12_1[7:0]][15:8] <= arg12_2[7:0];
CMD_SET_PALETTE_R: PALETTE[arg12_1[7:0]][23:16] <= arg12_2[7:0];
CMD_SET_WWINDOW_X: begin
window_x1 <= arg12_1;
window_x2 <= arg12_2;
window_x <= arg12_1;
mem_busy <= 1'b1;
end
CMD_SET_WWINDOW_Y: begin
window_y1 <= arg12_1;
window_y2 <= arg12_2;
window_y <= arg12_1;
mem_busy <= 1'b1;
/* verilator lint_off WIDTH */
window_row_start <= arg12_1 * mode_width + window_x1;
window_pixel_address <= arg12_1 * mode_width + window_x1;
/* verilator lint_on WIDTH */
end
CMD_FILLRECT: begin
fill_rect <= 1'b1;
fill_color <= arg24[15:0];
end
default: begin end
endcase
end else begin
case(set_regid)
REG_RESOLUTION: {mode_height, mode_width} <= arg24;
REG_COLORMODE: {mode_colormapped, mode_bpp} <= arg24[3:0];
REG_DISPLAYMODE: mode_magnify <= arg24[0];
REG_READREGID: read_regid <= arg24[2:0];
REG_ORIGIN: mode_origin_pix_address <= arg24;
REG_WRAP: mode_wrap_pix_address <= arg24;
default: begin end
endcase
end
end
end
// Write to VRAM (FILLRECT and interface with processor)
wire [14:0] vram_word_address = mem_address[16:2];
wire [15:0] pixel_color = fill_rect ? fill_color : mem_wdata[15:0];
// FILLRECT:
// The fillrect command repeatedly sends the same pixel data to the current
// window. It has two advantages as compared to do that by hand:
// - fills one pixel per clock (whereas in its fastest configuration,
// FemtoRV32 uses 6 clocks per loop iteration)
// - execution can continue, which lets FemtoRV prepare the next drawing
// operation. Before sending more data to FGA, FemtoRV needs to test
// the FGA_BUSY_bit in the control register, as follows:
// while(IO_IN(IO_FGA_CNTL) & FGA_BUSY_bit);
// This is used in LIBFEMTORV32/FGA.c, to implement hardware-accelerated
// polygon fill (using one FILLRECT call per polygon scanline).
always @(posedge clk) begin
// FILLRECT or pixel data sent to the graphic data port
if(fill_rect || (io_wstrb && sel_dat && mem_busy)) begin
/* verilator lint_off CASEINCOMPLETE */
case(mode_bpp)
MODE_16bpp: begin
case(window_pixel_address[0])
1'b0: VRAM[window_pixel_address[15:1]][15:0 ] <= pixel_color;
1'b1: VRAM[window_pixel_address[15:1]][31:16] <= pixel_color;
endcase
end
MODE_8bpp: begin
case(window_pixel_address[1:0])
2'b00: VRAM[window_pixel_address[16:2]][ 7:0 ] <= pixel_color[7:0];
2'b01: VRAM[window_pixel_address[16:2]][15:8 ] <= pixel_color[7:0];
2'b10: VRAM[window_pixel_address[16:2]][23:16] <= pixel_color[7:0];
2'b11: VRAM[window_pixel_address[16:2]][31:24] <= pixel_color[7:0];
endcase
end
MODE_4bpp: begin
case(window_pixel_address[2:0])
3'b000: VRAM[window_pixel_address[17:3]][ 3:0 ] <= pixel_color[3:0];
3'b001: VRAM[window_pixel_address[17:3]][ 7:4 ] <= pixel_color[3:0];
3'b010: VRAM[window_pixel_address[17:3]][11:8 ] <= pixel_color[3:0];
3'b011: VRAM[window_pixel_address[17:3]][15:12] <= pixel_color[3:0];
3'b100: VRAM[window_pixel_address[17:3]][19:16] <= pixel_color[3:0];
3'b101: VRAM[window_pixel_address[17:3]][23:20] <= pixel_color[3:0];
3'b110: VRAM[window_pixel_address[17:3]][27:24] <= pixel_color[3:0];
3'b111: VRAM[window_pixel_address[17:3]][31:28] <= pixel_color[3:0];
endcase
end
MODE_2bpp: begin
case(window_pixel_address[3:0])
4'b0000: VRAM[window_pixel_address[18:4]][ 1:0 ] <= pixel_color[1:0];
4'b0001: VRAM[window_pixel_address[18:4]][ 3:2 ] <= pixel_color[1:0];
4'b0010: VRAM[window_pixel_address[18:4]][ 5:4 ] <= pixel_color[1:0];
4'b0011: VRAM[window_pixel_address[18:4]][ 7:6 ] <= pixel_color[1:0];
4'b0100: VRAM[window_pixel_address[18:4]][ 9:8 ] <= pixel_color[1:0];
4'b0101: VRAM[window_pixel_address[18:4]][11:10] <= pixel_color[1:0];
4'b0110: VRAM[window_pixel_address[18:4]][13:12] <= pixel_color[1:0];
4'b0111: VRAM[window_pixel_address[18:4]][15:14] <= pixel_color[1:0];
4'b1000: VRAM[window_pixel_address[18:4]][17:16] <= pixel_color[1:0];
4'b1001: VRAM[window_pixel_address[18:4]][19:18] <= pixel_color[1:0];
4'b1010: VRAM[window_pixel_address[18:4]][21:20] <= pixel_color[1:0];
4'b1011: VRAM[window_pixel_address[18:4]][23:22] <= pixel_color[1:0];
4'b1100: VRAM[window_pixel_address[18:4]][25:24] <= pixel_color[1:0];
4'b1101: VRAM[window_pixel_address[18:4]][27:26] <= pixel_color[1:0];
4'b1110: VRAM[window_pixel_address[18:4]][29:28] <= pixel_color[1:0];
4'b1111: VRAM[window_pixel_address[18:4]][31:30] <= pixel_color[1:0];
endcase
end
default: begin // 1bpp
VRAM[window_pixel_address[19:5]][window_pixel_address[4:0]] <= pixel_color[0];
end
endcase
/* verilator lint_on CASEINCOMPLETE */
end else if(sel && !mem_busy) begin // Direct VRAM write from FemtoRV32
if(mem_wmask[0]) VRAM[vram_word_address][ 7:0 ] <= mem_wdata[ 7:0 ];
if(mem_wmask[1]) VRAM[vram_word_address][15:8 ] <= mem_wdata[15:8 ];
if(mem_wmask[2]) VRAM[vram_word_address][23:16] <= mem_wdata[23:16];
if(mem_wmask[3]) VRAM[vram_word_address][31:24] <= mem_wdata[31:24];
end
end
endmodule