--- Title: timing_gen_ext.vhd
--- Description: This module implements a timing generator with a register interface
--- 			     for the MityDSP.
---
---
---     o  0                          
---     | /       Copyright (c) 2014  
---    (CL)---o   Critical Link, LLC  
---      \                            
---       O                           
---
--- Company: Critical Link, LLC
--- Date: 10/06/2014
--- Version: 
---   1.00 - 8/20/2014 - Initial Version
---

library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use IEEE.STD_LOGIC_ARITH.ALL;
use IEEE.STD_LOGIC_UNSIGNED.ALL;
library Unisim;
use Unisim.Vcomponents.all;

entity timing_gen_ext is
	generic
	(
		BRAM_DEPTH_TWO_TO_N : integer range 9 to 10 := 10
	);                                                 
	port ( 
		-- EMIF Interface signals
		i_emif_clk : in  std_logic; -- emif_clk (system clock) from top-level
		i_ABus     : in  std_logic_vector(5 downto 0);
		i_DBus     : in  std_logic_vector(15 downto 0);
		o_DBus     : out std_logic_vector(15 downto 0);
		i_wr_en    : in  std_logic;
		i_rd_en    : in  std_logic;
		i_cs       : in  std_logic;
		-- IRQ system signals
		o_irq         : out std_logic;
		i_ilevel      : in    std_logic_vector(1 downto 0) := "00";       -- interrupt level (0=4,1=5,2=6,3=7)
		i_ivector     : in    std_logic_vector(3 downto 0) := "0000";     -- interrupt vector (0 through 15)

		-- External 32-bit Opcode Write Ports
		o_clka  : out std_logic;
		o_addra : out std_logic_vector(BRAM_DEPTH_TWO_TO_N-1 downto 0);
		o_dina  : out std_logic_vector(31 downto 0);
		i_douta : in  std_logic_vector(31 downto 0);
		o_wea   : out std_logic;

		-- External 32-bit Opcode Read Ports
		o_clkb  : out std_logic;
		o_addrb : out std_logic_vector(BRAM_DEPTH_TWO_TO_N-1 downto 0);
		i_doutb : in  std_logic_vector(31 downto 0);
		o_enb   : out std_logic;

		-- Timing Generator I/O 
		i_clk      : in std_logic;
		i_clk_en   : in std_logic;
		i_wait_inp : in std_logic_vector(15 downto 0);
		o_output   : out std_logic_vector(15 downto 0)
	);
end timing_gen_ext;

architecture rtl of timing_gen_ext is

--
-- Version information constants
--
constant CORE_APPLICATION_ID: std_logic_vector(7 downto 0) := CONV_STD_LOGIC_VECTOR( 021, 8);
constant CORE_VERSION_MAJOR:  std_logic_vector(3 downto 0) := CONV_STD_LOGIC_VECTOR(   1, 4);
constant CORE_VERSION_MINOR:  std_logic_vector(3 downto 0) := CONV_STD_LOGIC_VECTOR(   0, 4);
constant CORE_YEAR:           std_logic_vector(4 downto 0) := CONV_STD_LOGIC_VECTOR(  14, 5);
constant CORE_MONTH:          std_logic_vector(3 downto 0) := CONV_STD_LOGIC_VECTOR(  8, 4);
constant CORE_DAY:            std_logic_vector(4 downto 0) := CONV_STD_LOGIC_VECTOR(  14, 5);

-- Other constants
constant EVENT_REG_LEN	:	integer := 2;

--
-- All Used components should be declared first
--
component core_version is
	port (
		clk           : in std_logic;                       -- system clock
		rd            : in std_logic;                       -- read enable
		ID            : in std_logic_vector(7 downto 0);    -- assigned ID number, 0xF0-0xFF are reserved for customers
		version_major : in std_logic_vector(3 downto 0);    -- major version number 1-15
		version_minor : in std_logic_vector(3 downto 0);    -- minor version number 0-15
		year          : in std_logic_vector(4 downto 0);    -- year since 2000
		month         : in std_logic_vector(3 downto 0);    -- month (1-12)
		day           : in std_logic_vector(4 downto 0);    -- day (1-32)
		ilevel        : in std_logic_vector(1 downto 0) := "00";       -- interrupt level (0=4,1=5,2=6,3=7)
		ivector       : in std_logic_vector(3 downto 0) := "0000";    -- interrupt vector (0 through 15)
		o_data        : out std_logic_vector(15 downto 0)
	);
end component core_version;

--
-- Signal declarations
--
signal ver_rd      : std_logic := '0';
signal version_reg : std_logic_vector(15 downto 0);

-- Signals for inferring Distribute RAM
constant STACK_MEM_DEPTH: integer:= 16;
constant JMP_MEM_DEPTH  : integer:= 4;
constant D_WIDTH  : integer:=  10;
constant STACK_A_WIDTH : integer:=  4;
constant JMP_A_WIDTH   : integer:=  2;
type stack_mem_type is array (STACK_MEM_DEPTH - 1 downto 0) of
        STD_LOGIC_VECTOR (D_WIDTH - 1 downto 0);
type jmp_mem_type is array (JMP_MEM_DEPTH - 1 downto 0) of
        STD_LOGIC_VECTOR (D_WIDTH - 1 downto 0);
signal stack_mem : stack_mem_type := (others => (others=>'0')) ;
signal jmp_mem : jmp_mem_type := (others => (others=>'0')) ;
signal stack_dout,  jmp_dout  : std_logic_vector(D_WIDTH-1 downto 0) := (others => '0');
signal stack_din,   jmp_din   : std_logic_vector(D_WIDTH-1 downto 0) := (others => '0');
signal stack_we,    jmp_we    : std_logic := '0';
signal stack_waddr, stack_raddr : std_logic_vector(STACK_A_WIDTH-1 downto 0) := (others => '0');
signal jmp_waddr : std_logic_vector(JMP_A_WIDTH-1 downto 0) := (others => '0');
signal jmp_val_reg : std_logic_vector(3 downto 0) := (others => '0');
signal jmp_clr_reg : std_logic_vector(3 downto 0) := (others => '0');
signal jmp_we_r1, jmp_we_r2, jmp_we_meta : std_logic := '0';
signal jmp_valid : std_logic := '0';
signal alt_jump_addr : std_logic_vector(1 downto 0) := (others => '0');

-- Registers
signal stat_reg      : std_logic_vector(31 downto 0) := (others => '0');   
signal payload_override_en, payload_override_en_r1  : std_logic_vector(15 downto 0) := (others => '0');
signal payload_override_val, payload_override_val_r1  : std_logic_vector(15 downto 0) := (others => '0');
signal int_en_reg    : std_logic_vector(31 downto 0) := (others => '0');
signal int_stat_reg  : std_logic_vector(31 downto 0) := (others => '0'); 
signal wr_addr_reg   : std_logic_vector(31 downto 0) := (others => '0');   
signal wr_data_reg   : std_logic_vector(31 downto 0) := (others => '0');   
signal rd_data_reg   : std_logic_vector(31 downto 0) := (others => '0');   

signal wr_addr_inc, wr_addr_inc_r1 : std_logic := '0';
signal opcode_addr : std_logic_vector(9 downto 0) := "0100000010";
signal opcode_data : std_logic_vector(31 downto 0);
signal payload, payload_r1, payload_r2 : std_logic_vector(15 downto 0) := (others => '0');

-- Shift register so that the transition mark is longer than a single pulse
signal transition_event : std_logic_vector(EVENT_REG_LEN - 1 downto 0) := (others => '0');
-- Notice from the edge detect process that the transition occurred.
signal transition_detected : std_logic := '0';
-- The last state of the input signal.
signal inp_last : std_logic := '0';
-- Tells the edge detect process to start looking for a transition.
signal detect_transition : std_logic := '0';

-- Intermediate write registers.
signal stat_reg_intermed      : std_logic_vector(15 downto 0) := (others => '0');
signal int_en_reg_intermed    : std_logic_vector(15 downto 0) := (others => '0');
signal wr_addr_reg_intermed   : std_logic_vector(15 downto 0) := (others => '0'); 
signal wr_data_reg_intermed   : std_logic_vector(15 downto 0) := (others => '0');

-- OPCODE state machine
type OPCODES is (JUMP, REPEAT, SET_A, SET_B, LOOP_A, LOOP_B, CALL, RETURNTO, WAIT_TRANSITION);
--attribute ENUM_ENCODING: STRING;
--attribute ENUM_ENCODING of OPCODES:type is "0000 0001 0010 0011 0100 0101 0110 0111";
signal opcode : OPCODES;

signal jump_addr, loop_addra, loop_addrb, call_addr, ret_addr, curr_addr, next_addr : std_logic_vector(9 downto 0) := "0100000010";
signal curr_addr_r1 : std_logic_vector(9 downto 0);
signal rep_cnt, loop_cnta, loop_cntb : std_logic_vector(10 downto 0) := (others => '0');
signal clks_per_instr : std_logic := '1';

signal last_waitt : std_logic := '0';

begin -- architecture: rtl

version : core_version
	port map(
		clk           => i_emif_clk,           -- system clock
		rd            => ver_rd,               -- read enable
		ID            => CORE_APPLICATION_ID,  -- assigned ID number, 0xFF if unassigned
		version_major => CORE_VERSION_MAJOR,   -- major version number 1-15
		version_minor => CORE_VERSION_MINOR,   -- minor version number 0-15
		year          => CORE_YEAR,            -- year since 2000
		month         => CORE_MONTH,           -- month (1-12)
		day           => CORE_DAY,             -- day (1-31)
		ilevel        => i_ilevel,
		ivector       => i_ivector,
		o_data        => version_reg
	);

------------------------------------------------------------------------------
-- Handle read requests from the DSP
------------------------------------------------------------------------------
read_regs : process (i_emif_clk)
begin
	if i_emif_clk'event and i_emif_clk='1' then
		-- version register reads
		if i_ABus="000000" and i_cs='1' and i_rd_en='1' then
			ver_rd <= '1';
		else
			ver_rd <= '0';
		end if;

		if i_cs = '0' then
			o_DBus <= (others=>'0');
		else
			case i_ABus is
			when "000000" =>
				o_DBus <= version_reg;
			when "000001" =>
				o_DBus <= stat_reg(31 downto 16);
			when "000010" =>
				o_DBus <= stat_reg(15 downto 0);
			when "000011" =>
				o_DBus <= payload_override_en;
			when "000100" =>
				o_DBus <= payload_override_val;
			when "000101" =>
				o_DBus <= int_en_reg(31 downto 16);
			when "000110" =>
				o_DBus <= int_en_reg(15 downto 0);
			when "000111" =>
				o_DBus <= int_stat_reg(31 downto 16);
			when "001000" =>	
				o_DBus <= int_stat_reg(15 downto 0);
			when "001001" =>
				o_DBus <= wr_addr_reg(31 downto 16);
			when "001010" =>
				o_DBus <= wr_addr_reg(15 downto 0);
			when "001011" =>
				o_DBus <= rd_data_reg(31 downto 16);
			when "001100" =>
				o_DBus <= rd_data_reg(15 downto 0);
			when "001101" =>
				o_DBus <= CONV_STD_LOGIC_VECTOR(2**BRAM_DEPTH_TWO_TO_N,32)(31 downto 16); -- BRAM DEPTH
			when "001110" =>
				o_DBus <= CONV_STD_LOGIC_VECTOR(2**BRAM_DEPTH_TWO_TO_N,32)(15 downto 0); -- BRAM DEPTH
			when others =>
				o_DBus <= (others=>'0');
			end case;
		end if;
	end if;
end process read_regs;

------------------------------------------------------------------------------
-- Handle register write requests from the DSP
------------------------------------------------------------------------------
write_regs : process(i_emif_clk)
begin
	if i_emif_clk'event and i_emif_clk='1' then
		wr_addr_inc_r1 <= wr_addr_inc;
		payload_r1 <= payload;
		payload_r2 <= payload_r1;
	if i_cs = '1' and i_wr_en = '1' then
		case i_ABus is
			when "000001" =>
				stat_reg_intermed <= i_DBus;
			when "000010" =>
				stat_reg <= stat_reg_intermed & i_DBus;
			when "000011" =>
				payload_override_en <= i_DBus;
			when "000100" =>
				payload_override_val <= i_DBus;
			when "000101" =>
				int_en_reg_intermed <= i_DBus;
			when "000110" =>
				int_en_reg <= int_en_reg_intermed & i_DBus;
			when "000111" =>
				int_stat_reg(31 downto 16) <= int_stat_reg(31 downto 16) and NOT i_DBus(15 downto 0);
			when "001000" =>	
				int_stat_reg(15 downto 0) <= int_stat_reg(15 downto 0) and NOT i_DBus(15 downto 0);
			when "001001" =>
				wr_addr_reg_intermed <= i_DBus;
			when "001010" =>
				wr_addr_reg <= wr_addr_reg_intermed & i_DBus;
			when "001011" =>
				wr_data_reg_intermed <= i_DBus;
			when "001100" =>
				wr_data_reg <= wr_data_reg_intermed & i_DBus;
				wr_addr_inc  <= '1';
			when "001111" =>
				jmp_valid <= i_DBus(15);
				jmp_waddr <= i_DBus(13 downto 12);
				jmp_din   <= i_DBus(BRAM_DEPTH_TWO_TO_N-1 downto 0);
				jmp_we    <= NOT(jmp_we);
			when others => null;
		end case;
	else
		int_stat_reg(15 downto 0) <= payload_r2 and int_en_reg(15 downto 0);
		wr_addr_inc <= '0';
		if wr_addr_inc_r1 = '1' then
			wr_addr_reg(BRAM_DEPTH_TWO_TO_N-1 downto 0) <= wr_addr_reg(BRAM_DEPTH_TWO_TO_N-1 downto 0) + '1';     -- auto increment address 
		end if;

		end if;
	end if;
end process write_regs;

o_output   <= payload;
           
o_irq <= '1' when int_stat_reg(15 downto 0) /= x"0000" else '0';

opcode <= JUMP     when opcode_data(31 downto 28) = x"0" else
          REPEAT   when opcode_data(31 downto 28) = x"1" else
          SET_A    when opcode_data(31 downto 28) = x"2" else
          SET_B    when opcode_data(31 downto 28) = x"3" else
          LOOP_A   when opcode_data(31 downto 28) = x"4" else
          LOOP_B   when opcode_data(31 downto 28) = x"5" else
          CALL     when opcode_data(31 downto 28) = x"6" else
          RETURNTO when opcode_data(31 downto 28) = x"7" else
          WAIT_TRANSITION when opcode_data(31 downto 28) = x"8" else
          JUMP;

opcode_addr <= jump_addr  when (opcode = JUMP     			and clks_per_instr = '1') else
               next_addr  when (opcode = REPEAT   			and clks_per_instr = '1' and rep_cnt = "00000000000") or 
                               (opcode = SET_A    			and clks_per_instr = '1') or
                               (opcode = SET_B    			and clks_per_instr = '1') or
                               (opcode = LOOP_A   			and clks_per_instr = '1' and loop_cnta = "00000000000") or 
                               (opcode = LOOP_B   			and clks_per_instr = '1' and loop_cntb = "00000000000") or 
                               (opcode = WAIT_TRANSITION	and clks_per_instr = '1' and transition_event(0) = '1') else
               loop_addra when (opcode = LOOP_A   			and clks_per_instr = '1' and loop_cnta /= "00000000000") else
               loop_addrb when (opcode = LOOP_B   			and clks_per_instr = '1' and loop_cntb /= "00000000000") else
               call_addr  when (opcode = CALL 				and clks_per_instr = '1') else
               ret_addr   when (opcode = RETURNTO 			and clks_per_instr = '1') else
               curr_addr;

------------------------------------------------------------------------------
-- Edge detection process
------------------------------------------------------------------------------
edge_detect : process (i_clk)
begin
	if i_clk'event and i_clk='1' then
		if i_clk_en = '1' then
			-- Always be latching the last input
			inp_last <= i_wait_inp(CONV_INTEGER(opcode_data(20 downto 17)));

			if detect_transition = '1' then
				-- If we are in a detection mode.
				if opcode_data(16) = '1' and inp_last = '0' and i_wait_inp(CONV_INTEGER(opcode_data(19 downto 17))) = '1' then
					-- Looking for rising edge and found rising edge:
					transition_detected <= '1';
				elsif opcode_data(16) = '0' and inp_last = '1' and i_wait_inp(CONV_INTEGER(opcode_data(19 downto 17))) = '0' then
					-- Looking for falling edge and found falling edge:
					transition_detected <= '1';
				end if;
			else
				-- Reset the transition detected signal when not looking for a transition.
				transition_detected <= '0';
			end if;
		end if;
	end if;
end process;

------------------------------------------------------------------------------
-- Decode Opcode Process
------------------------------------------------------------------------------
decode_opcode : process (i_clk)
begin
   if i_clk'event and i_clk='1' then
      if i_clk_en = '1' then
         clks_per_instr <= NOT clks_per_instr;
         curr_addr  <= opcode_addr;
         curr_addr_r1 <= curr_addr;
         next_addr  <= opcode_addr + '1'; 
         stack_we <= '0';
         payload_override_en_r1 <= payload_override_en;
         payload_override_val_r1 <= payload_override_val;
		 
		 last_waitt <= '0';

         for i in 0 to 15 loop
         -- override any payload bits based on override mask
            if payload_override_en_r1(i)='1' then
                payload(i) <= payload_override_val_r1(i);
         -- when bit 27 is set to 1 for any opcodes, the payload output keeps repeating previous results
            elsif opcode_data(27)='0' then
                payload(i) <= opcode_data(i);
            end if;
         end loop;

		 -- Shift a 0 into 
		 transition_event <= '0' & transition_event(EVENT_REG_LEN - 1 downto 1);
         case opcode_data(31 downto 28) is
            -- JUMP
            when x"0" => 

            -- REPEAT
            when x"1" => if curr_addr /= curr_addr_r1 then
                            rep_cnt <= opcode_data(26 downto 16);
                         elsif clks_per_instr = '1' then
                            if rep_cnt /= "00000000000" then
                               rep_cnt <= rep_cnt - '1';
                            end if;
                         end if;

            -- SET-A
            when x"2" => loop_cnta  <= opcode_data(26 downto 16) - '1';
            
            -- SET-B
            when x"3" => loop_cntb  <= opcode_data(26 downto 16) - '1';

            -- LOOP-A
            when x"4" => loop_addra <= opcode_data(25 downto 16);
                         if clks_per_instr = '1' then
                            if loop_cnta /= "00000000000" then
                               loop_cnta <= loop_cnta - '1';
                            end if;   
                         end if;

            -- LOOP-B
            when x"5" => loop_addrb <= opcode_data(25 downto 16);
                         if clks_per_instr = '1' then
                            if loop_cntb /= "00000000000" then
                               loop_cntb <= loop_cntb - '1';
                            end if;   
                         end if;
                         
            -- CALL
            when x"6" => call_addr  <= opcode_data(25 downto 16);
                         
                         if clks_per_instr = '1' then
                            stack_waddr <= stack_waddr + '1';
                            stack_raddr <= stack_waddr;
                            stack_we  <= '0';
                         else
                            stack_din <= opcode_addr + '1';
                            stack_we  <= '1';
                         end if;

            --RETURN
            when x"7" => if clks_per_instr = '1' then
                            stack_waddr <= stack_waddr - x"1";
                            stack_raddr <= stack_raddr - x"1";
                         end if;
						 
			--WAIT_TRANSITION
            when x"8" => if last_waitt = '0' then
							detect_transition <= '1';	-- Start the detection
						 elsif transition_detected = '1' then	-- When a transition is found
							transition_event <= (others => '1');	-- Trigger the shift register notifying to go to the next address.
							detect_transition <= '0';
						 end if;
						 
						 last_waitt <= '1';

				when others => null;
         end case;
         
      end if;
   end if;
end process decode_opcode;

-- Only two locations for alternate jump addresses
alt_jump_addr <= '0' & opcode_data(26) when opcode_data(31 downto 28) = x"0" else      
                 "11";
                 
jump_addr <= opcode_data(25 downto 16) when jmp_val_reg(CONV_INTEGER(alt_jump_addr)) = '0' and opcode_data(31 downto 28) = x"0" else
             jmp_dout;

------------------------------------------------------------------------------
-- Jump Valid Register Set/Clear Process
------------------------------------------------------------------------------
jmp_set_clear : process(i_clk)
begin
	if i_clk'event and i_clk='1' then
		if i_clk_en = '1' then
			jmp_we_meta <= jmp_we;
			jmp_we_r1 <= jmp_we_meta;
			jmp_we_r2 <= jmp_we_r1;

			for i in 0 to 3 loop
				-- latch the valid register in fpga when valid bit is set
				if jmp_we_r1 /= jmp_we_r2 and jmp_valid = '1' and 
					jmp_waddr = CONV_STD_LOGIC_VECTOR(i,2) then
					jmp_val_reg(i) <= '1';
					-- clear the valid register in fpga when valid bit is clear
				elsif jmp_we_r1 /= jmp_we_r2 and jmp_valid = '0' and 
					jmp_waddr = CONV_STD_LOGIC_VECTOR(i,2) then
					jmp_val_reg(i) <= '0';
					-- clear the valid register when alternate jump occurs
				elsif jmp_clr_reg(i) = '1' then
					jmp_val_reg(i) <= '0';
				end if;
			end loop;

			for i in 0 to 3 loop
				-- if any of the latch bits are set, then read out data from that address
				if jmp_val_reg(i) = '1' and opcode = JUMP then
					jmp_clr_reg(i) <= '1';
				elsif jmp_val_reg(i) = '0' then
					jmp_clr_reg(i) <= '0';
				end if;
			end loop;

		end if;
	end if;
end process jmp_set_clear;
                  

ret_addr  <= stack_dout;
------------------------------------------------------------------------------
-- Inferring Call Stack Block RAM Sync Process
--
-- This does not infer a block RAM because of the asynchronous read.
-- If a synchronous read is used, it will still implement in LUTs for greater
-- performance since they are small.
------------------------------------------------------------------------------
stack_push : process (i_clk) 
begin
	if i_clk'event and i_clk='1' then
		if i_clk_en = '1' then
			if (stack_we = '1') then
				stack_mem(conv_integer(stack_waddr)) <= stack_din;
			end if;
		end if;
	end if;
end process stack_push;
stack_dout <= stack_mem(conv_integer(stack_raddr));
      
------------------------------------------------------------------------------
-- Inferring Alternate Jump Table Block RAM Async Process
--
-- This does not infer a block RAM because of the asynchronous read.
-- If a synchronous read is used, it will still implement in LUTs for greater
-- performance since they are small.
------------------------------------------------------------------------------
jump_bram_wr : process (i_clk) 
begin
	if i_clk'event and i_clk='1' then
		if i_clk_en = '1' then
			if jmp_we_r1 /= jmp_we_r2 then
				jmp_mem(conv_integer(jmp_waddr)) <= jmp_din;
			end if;
		end if;
	end if;
end process jump_bram_wr;
jmp_dout <= jmp_mem(conv_integer(alt_jump_addr));

-- USER COMBINATORIAL LOGIC GOES HERE
o_addra <= wr_addr_reg(9 downto 0);
o_addrb <= opcode_addr;
o_clka  <= i_emif_clk;
o_clkb  <= i_clk;
o_dina  <= wr_data_reg;
rd_data_reg <= i_douta;
opcode_data <= i_doutb;
o_enb   <= i_clk_en;
o_wea   <= wr_addr_inc;

end rtl;