// femtorv32, a minimalistic RISC-V RV32I core // Bruno Levy, 2020-2021 // // This file: FGA: Femto Graphics Adapter // Note: VRAM is write-only ! (the read port is used by HDMI) // // sel_cntl / io_wstrb / io_rstrb gives access to the set of control // registers and commands: // // Write: set register: value[31:8] REG_XXX[7:0] // command (1 arg): arg24[31:8] 1[7] CMD_XXX[6:0] // command (2 args): arg12_1[31:20] arg12_2[19:8] 1[7] CMD_XXX[6:0] // // Read: the value of the register indicated by REG_READREGID // // Registers: // REG_STATUS (0): vblank[31] hblank[30] drawarea[29] membusy[28] XXXX[27:24] Y[23:12] X[11:0] // RESOLUTION (1): height[23:12] width[11:0] // COLORMODE (2): colormapped[3] bpp[2:0] (0:1bpp 1:2bpp 2:4bpp 3:8bpp 4:16bpp) // DISPLAYMODE (3): magnify[0] // ORIGIN (4): origin_pixel_address[23:0] (first scanline starts at this pixel address) // WRAP (5): wrap_pixel_address[23:0] (restart at pixel address 0 when reached) // READREGID (6): mapped_regid[2:0] (the register mapped for read access) // // Commands: // SET_PALETTE_R (1) arg12_1: cmap entry arg12_2: R // SET_PALETTE_G (2) arg12_1: cmap entry arg12_2: G // SET_PALETTE_B (3) arg12_1: cmap entry arg12_2: B // SET_WWINDOW_X (4) arg12_1: x1 arg12_2: x2 // SET_WWINDOW_Y (5) arg12_1: y1 arg12_2: y2 // FILLRECT (6) arg24: color // // The window [x1-x2] [y1-y2] can be used in two different ways: // - FILLRECT fills it with the specified color. Operation is // complete when membusy goes low in REG_STATUS. // - individual pixel values can be specified one by one by // writing to the DAT mapped IO (io_wstrb + sel_dat), pixel // address is incremented automatically. // This allows emulation of SSD1331/SSD1351 "window write" // command in the three modes for OLED-HDMI mirroring // // See FIRMWARE/LIBFEMTOGL/FGA.h, FGA.c and FGA_mode.c // "Physical mode" sent to the HDMI (choose one of them) // Note: > 640x480 may make timings fail //`define MODE_640x480 `define MODE_800x600 //`define MODE_1024x768 //`define MODE_1280x1024 `include "GFX_hdmi.v" module FGA( input wire pclk, // board clock input wire clk, // system clock input wire sel, // if zero, writes are ignored input wire [3:0] mem_wmask, // mem write mask and strobe input wire [16:0] mem_address, // address in graphic memory (128K), word-aligned input wire [31:0] mem_wdata, // data to be written output wire [3:0] gpdi_dp, // HDMI signals, blue, green, red, clock // dgpi_dn generated by pins (see ulx3s.lpf) input wire io_wstrb, input wire io_rstrb, input wire sel_cntl, // IO: select control register (RW) input wire sel_dat, // IO: select data input (W) output wire [31:0] rdata // data read ); `include "GFX_modes.v" wire pixel_clk; reg [31:0] VRAM[0:32767]; reg [23:0] PALETTE[0:255]; /************************* HDMI signal generation ***************************/ // Video mode parameters localparam MODE_1bpp = 3'd0; localparam MODE_2bpp = 3'd1; localparam MODE_4bpp = 3'd2; localparam MODE_8bpp = 3'd3; localparam MODE_16bpp = 3'd4; reg [11:0] mode_width; reg [11:0] mode_height; reg [2:0] mode_bpp; // see MODE_xbpp constants reg mode_colormapped; reg mode_magnify; // asserted for pixel doubling reg [23:0] mode_origin_pix_address; reg [23:0] mode_wrap_pix_address; // This part is just like a VGA generator. reg [11:0] X, Y; // current pixel coordinates reg hsync, vsync; // horizontal and vertical synchronization reg draw_area; // asserted if current pixel is in drawing area reg mem_busy; // asserted if memory transfer is running. // Data read from control register reg [31:0] read_reg; assign rdata = (io_rstrb && sel_cntl) ? read_reg : 32'b0; // We are going to fetch data from video RAM (now stored in BRAM), and then, // in colormapped modes, fetch colormap entry. Each fetch introduces some // latency -> there is a small pixel pipeline. Each stage needs to have // its own copy of all registers it needs (that is, copy pixel address // between stage 1 and stage 2 to keep it in sync with pixel data). // // Stage 0 generates the X,Y coordinates and horizontal,vertical sync signals // (standard in all VGA/DVI/HDMI drivers) // Stage 1 generates the pixel address. The unit is in number of pixels. // it handles pixel doubling/scanline doubling in 320x200 resolutions // it also handles page flipping, with the ORIGIN register. // Stage 2 fetches pixel data from RAM. It handles pixel address -> word address // translation. It creates its own copy of pixel_address to keep it in // sync with pixel data (1 clock latency) // Stage 3 generates R,G,B either from colormap lookup (mode 1 and 2) or from // 16 bit pixel data directly (mode 0). If colormap lookup is used, // it generates an additional cycle of latency. // // Note: the first two pixel columns are wrong due to latency (the image is // shifted two pixels to the right, with garbage in the first two columns), // normally we should start fetching from the previous scanline, at the end // of hsync, 1 clock in advance in mode 0, and two clocks in advance in mode 1. // I was too lazy to do that, so I just hide the first two columns ! // (so there are two columns missing on the right side of the image). // I will do that properly when VRAM will be stored in SDRAM (then I'll have no // choice, latency will probably be significantly larger than 2 pixels). // Stage 0: X,Y,vsync,hsync generation always @(posedge pixel_clk) begin if(X == GFX_line_width-1) begin X <= 0; Y <= (Y == GFX_lines-1) ? 0 : Y+1; end else begin X <= X+1; end hsync <= (X>=GFX_width+GFX_h_front_porch) && (X=GFX_height+GFX_v_front_porch) && (Y> 1; MODE_8bpp: word_address = pix_address >> 2; MODE_4bpp: word_address = pix_address >> 3; MODE_2bpp: word_address = pix_address >> 4; MODE_1bpp: word_address = pix_address >> 5; default: word_address = 0; endcase end reg [23:0] pix_address_2; reg [31:0] pix_word_data_2; always @(posedge pixel_clk) begin pix_address_2 <= pix_address; pix_word_data_2 <= VRAM[word_address[14:0]]; // TODO end // Stage 3: generate R,G,B from pixel data // combinatorial circuit to extract index from // pixel data. reg [7:0] pix_color_index_3; /* verilator lint_off WIDTH */ always @(*) begin case(mode_bpp) MODE_8bpp: begin pix_color_index_3 = pix_word_data_2 >> {pix_address_2[1:0], 3'b0}; end MODE_4bpp: begin pix_color_index_3[3:0] = pix_word_data_2 >> {pix_address_2[2:0], 2'b0}; pix_color_index_3[7:4] = 4'b0; end MODE_2bpp: begin pix_color_index_3[1:0] = pix_word_data_2 >> {pix_address_2[3:0], 1'b0}; pix_color_index_3[7:2] = 6'b0; end MODE_1bpp: begin pix_color_index_3[0] = pix_word_data_2 >> pix_address_2[4:0]; pix_color_index_3[7:1] = 7'b0; end default: begin pix_color_index_3 = 0; end endcase end /* verilator lint_on WIDTH */ reg [11:0] maxX; reg [11:0] maxY; always @(posedge clk) begin maxX <= mode_magnify ? (mode_width << 1) : mode_width; maxY <= mode_magnify ? (mode_height << 1) : mode_height; end reg [7:0] R,G,B; always @(posedge pixel_clk) begin if(mode_colormapped) begin {R,G,B} <= PALETTE[pix_color_index_3]; end else begin if(pix_address_2[0]) begin R <= {pix_word_data_2[31:27],3'b000}; G <= {pix_word_data_2[26:21],2'b00 }; B <= {pix_word_data_2[20:16],3'b000}; end else begin R <= {pix_word_data_2[15:11],3'b000}; G <= {pix_word_data_2[10:5 ],2'b00 }; B <= {pix_word_data_2[ 4:0 ],3'b000}; end end // Hide what's outside the display zone. // Hide the first two columns (I was too lazy to properly handle my // pixel pipeline latency). if(X == 0 || X == 1 || X >= maxX || Y >= maxY) {R,G,B} <= 24'b0; end // Video signal generation and HDMI wire pixel_clk_x5; // The pixel_clk*5 freq clock used by the serializers (DDR) // The graphic PLL, that generates the pixel clock (and freq*5 clock) GFX_PLL gfx_pll( .pclk(pclk), .pixel_clk(pixel_clk), .pixel_clk_x5(pixel_clk_x5) ); // The HDMI encoder GFX_hdmi hdmi( .pixel_clk(pixel_clk), .pixel_clk_x5(pixel_clk_x5), .R(R), .G(G), .B(B), .hsync(hsync), .vsync(vsync), .draw_area(draw_area), .gpdi_dp(gpdi_dp) ); /*************************************************************************/ wire is_command = mem_wdata[7]; wire [2:0] command = mem_wdata[2:0]; wire [2:0] set_regid = mem_wdata[2:0]; wire[23:0] arg24 = mem_wdata[31:8]; wire[11:0] arg12_1 = mem_wdata[19:8]; wire[11:0] arg12_2 = mem_wdata[31:20]; localparam REG_STATUS = 3'd0; localparam REG_RESOLUTION = 3'd1; localparam REG_COLORMODE = 3'd2; localparam REG_DISPLAYMODE = 3'd3; localparam REG_ORIGIN = 3'd4; localparam REG_WRAP = 3'd5; localparam REG_READREGID = 3'd6; localparam CMD_SET_PALETTE_R = 3'd1; localparam CMD_SET_PALETTE_G = 3'd2; localparam CMD_SET_PALETTE_B = 3'd3; localparam CMD_SET_WWINDOW_X = 3'd4; localparam CMD_SET_WWINDOW_Y = 3'd5; localparam CMD_FILLRECT = 3'd6; // Windowed-pixel write and fillrect command. // // - write window command, two commands: // (send 32 bits to IO_FGA_CNTL hardware register) // SET_WWINDOW_X: X1 X2 // SET_WWINDOW_Y: Y1 Y2 // // - write data: send 16 bits to IO_FGA_DAT hardware register // MSB first, encoding follows SSD1351: RRRRR GGGGG 0 BBBBB // // Note that once the window is properly initialized, the write // data command emulates the SSD1351 OLED display, then by writing // to both FGA and SSD1351 control registers, one clones the output // of the SSD1351 oled display to the HDMI screen for free ! // // See in : // #define IO_GFX_DAT (IO_SSD1351_DAT16 | IO_FGA_DAT) // #define OLED_WRITE_DATA_UINT16(RGB) IO_OUT(IO_GFX_DAT,(RGB)) // #define OLED_WRITE_DATA_RGB(R,G,B) OLED_WRITE_DATA_UINT16(GL_RGB(R,G,B)) // // This also works when FGA is in paletted mode (320x200x8bpp, 640x400x4bpp) // since the write data command properly interprets pixel addresses. The // only requirement is to have a palette that will correctly map the 8 LSBs // / 4 LSBs of pixel data to a color. In libfemtorv32, this maps 0 to black // and any non-zero to white (this is how COMMANDER is displayed in 640x400 // on the HDMI screen). // // To generate pixel data, there are two other options: // - directly writing to VRAM from FemtoRV32 // - FILLRECT (see below) reg [11:0] window_x1, window_x2, window_y1, window_y2, window_x, window_y; reg [23:0] window_row_start; reg [23:0] window_pixel_address; reg [15:0] fill_color; reg fill_rect; // Data read from control register: depends on mapped register (read_regid) reg [2:0] read_regid; always @(posedge clk) begin case(read_regid) REG_RESOLUTION: read_reg <= {8'b0, mode_height, mode_width}; REG_COLORMODE: read_reg <= {28'b0, mode_colormapped, mode_bpp}; REG_DISPLAYMODE: read_reg <= {31'b0, mode_magnify}; REG_ORIGIN: read_reg <= {8'b0, mode_origin_pix_address}; REG_WRAP: read_reg <= {8'b0, mode_wrap_pix_address}; REG_READREGID: read_reg <= {29'b0, read_regid}; default: read_reg <= {(Y >= 400),(X >= 640),draw_area,mem_busy,4'b0,X,Y}; endcase end always @(posedge clk) begin if(mem_busy && ((io_wstrb && sel_dat) || fill_rect)) begin window_pixel_address <= window_pixel_address + 1; window_x <= window_x + 1; if(window_x == window_x2) begin if(window_y == window_y2) begin mem_busy <= 1'b0; fill_rect <= 1'b0; end else begin window_y <= window_y+1; window_x <= window_x1; window_pixel_address <= window_row_start + {12'b0, mode_width}; window_row_start <= window_row_start + {12'b0, mode_width}; end end end if(io_wstrb && sel_cntl) begin if(is_command) begin case(command) CMD_SET_PALETTE_B: PALETTE[arg12_1[7:0]][7:0 ] <= arg12_2[7:0]; CMD_SET_PALETTE_G: PALETTE[arg12_1[7:0]][15:8] <= arg12_2[7:0]; CMD_SET_PALETTE_R: PALETTE[arg12_1[7:0]][23:16] <= arg12_2[7:0]; CMD_SET_WWINDOW_X: begin window_x1 <= arg12_1; window_x2 <= arg12_2; window_x <= arg12_1; mem_busy <= 1'b1; end CMD_SET_WWINDOW_Y: begin window_y1 <= arg12_1; window_y2 <= arg12_2; window_y <= arg12_1; mem_busy <= 1'b1; /* verilator lint_off WIDTH */ window_row_start <= arg12_1 * mode_width + window_x1; window_pixel_address <= arg12_1 * mode_width + window_x1; /* verilator lint_on WIDTH */ end CMD_FILLRECT: begin fill_rect <= 1'b1; fill_color <= arg24[15:0]; end default: begin end endcase end else begin case(set_regid) REG_RESOLUTION: {mode_height, mode_width} <= arg24; REG_COLORMODE: {mode_colormapped, mode_bpp} <= arg24[3:0]; REG_DISPLAYMODE: mode_magnify <= arg24[0]; REG_READREGID: read_regid <= arg24[2:0]; REG_ORIGIN: mode_origin_pix_address <= arg24; REG_WRAP: mode_wrap_pix_address <= arg24; default: begin end endcase end end end // Write to VRAM (FILLRECT and interface with processor) wire [14:0] vram_word_address = mem_address[16:2]; wire [15:0] pixel_color = fill_rect ? fill_color : mem_wdata[15:0]; // FILLRECT: // The fillrect command repeatedly sends the same pixel data to the current // window. It has two advantages as compared to do that by hand: // - fills one pixel per clock (whereas in its fastest configuration, // FemtoRV32 uses 6 clocks per loop iteration) // - execution can continue, which lets FemtoRV prepare the next drawing // operation. Before sending more data to FGA, FemtoRV needs to test // the FGA_BUSY_bit in the control register, as follows: // while(IO_IN(IO_FGA_CNTL) & FGA_BUSY_bit); // This is used in LIBFEMTORV32/FGA.c, to implement hardware-accelerated // polygon fill (using one FILLRECT call per polygon scanline). always @(posedge clk) begin // FILLRECT or pixel data sent to the graphic data port if(fill_rect || (io_wstrb && sel_dat && mem_busy)) begin /* verilator lint_off CASEINCOMPLETE */ case(mode_bpp) MODE_16bpp: begin case(window_pixel_address[0]) 1'b0: VRAM[window_pixel_address[15:1]][15:0 ] <= pixel_color; 1'b1: VRAM[window_pixel_address[15:1]][31:16] <= pixel_color; endcase end MODE_8bpp: begin case(window_pixel_address[1:0]) 2'b00: VRAM[window_pixel_address[16:2]][ 7:0 ] <= pixel_color[7:0]; 2'b01: VRAM[window_pixel_address[16:2]][15:8 ] <= pixel_color[7:0]; 2'b10: VRAM[window_pixel_address[16:2]][23:16] <= pixel_color[7:0]; 2'b11: VRAM[window_pixel_address[16:2]][31:24] <= pixel_color[7:0]; endcase end MODE_4bpp: begin case(window_pixel_address[2:0]) 3'b000: VRAM[window_pixel_address[17:3]][ 3:0 ] <= pixel_color[3:0]; 3'b001: VRAM[window_pixel_address[17:3]][ 7:4 ] <= pixel_color[3:0]; 3'b010: VRAM[window_pixel_address[17:3]][11:8 ] <= pixel_color[3:0]; 3'b011: VRAM[window_pixel_address[17:3]][15:12] <= pixel_color[3:0]; 3'b100: VRAM[window_pixel_address[17:3]][19:16] <= pixel_color[3:0]; 3'b101: VRAM[window_pixel_address[17:3]][23:20] <= pixel_color[3:0]; 3'b110: VRAM[window_pixel_address[17:3]][27:24] <= pixel_color[3:0]; 3'b111: VRAM[window_pixel_address[17:3]][31:28] <= pixel_color[3:0]; endcase end MODE_2bpp: begin case(window_pixel_address[3:0]) 4'b0000: VRAM[window_pixel_address[18:4]][ 1:0 ] <= pixel_color[1:0]; 4'b0001: VRAM[window_pixel_address[18:4]][ 3:2 ] <= pixel_color[1:0]; 4'b0010: VRAM[window_pixel_address[18:4]][ 5:4 ] <= pixel_color[1:0]; 4'b0011: VRAM[window_pixel_address[18:4]][ 7:6 ] <= pixel_color[1:0]; 4'b0100: VRAM[window_pixel_address[18:4]][ 9:8 ] <= pixel_color[1:0]; 4'b0101: VRAM[window_pixel_address[18:4]][11:10] <= pixel_color[1:0]; 4'b0110: VRAM[window_pixel_address[18:4]][13:12] <= pixel_color[1:0]; 4'b0111: VRAM[window_pixel_address[18:4]][15:14] <= pixel_color[1:0]; 4'b1000: VRAM[window_pixel_address[18:4]][17:16] <= pixel_color[1:0]; 4'b1001: VRAM[window_pixel_address[18:4]][19:18] <= pixel_color[1:0]; 4'b1010: VRAM[window_pixel_address[18:4]][21:20] <= pixel_color[1:0]; 4'b1011: VRAM[window_pixel_address[18:4]][23:22] <= pixel_color[1:0]; 4'b1100: VRAM[window_pixel_address[18:4]][25:24] <= pixel_color[1:0]; 4'b1101: VRAM[window_pixel_address[18:4]][27:26] <= pixel_color[1:0]; 4'b1110: VRAM[window_pixel_address[18:4]][29:28] <= pixel_color[1:0]; 4'b1111: VRAM[window_pixel_address[18:4]][31:30] <= pixel_color[1:0]; endcase end default: begin // 1bpp VRAM[window_pixel_address[19:5]][window_pixel_address[4:0]] <= pixel_color[0]; end endcase /* verilator lint_on CASEINCOMPLETE */ end else if(sel && !mem_busy) begin // Direct VRAM write from FemtoRV32 if(mem_wmask[0]) VRAM[vram_word_address][ 7:0 ] <= mem_wdata[ 7:0 ]; if(mem_wmask[1]) VRAM[vram_word_address][15:8 ] <= mem_wdata[15:8 ]; if(mem_wmask[2]) VRAM[vram_word_address][23:16] <= mem_wdata[23:16]; if(mem_wmask[3]) VRAM[vram_word_address][31:24] <= mem_wdata[31:24]; end end endmodule