diff --git a/bsg_misc/bsg_popcount_tdm.py b/bsg_misc/bsg_popcount_tdm.py new file mode 100644 index 000000000..0f7441d3c --- /dev/null +++ b/bsg_misc/bsg_popcount_tdm.py @@ -0,0 +1,354 @@ +#!/usr/bin/env python3 +import math +import sys + +""" +Popcount TDM Generator + +Katharine Lundblad 04/13/2023 + +Popcount takes a binary value and counts the number of ones for a specified width. +This version attempts to use a more optimized adder tree in order to add partial sums +and balance delays using the Three Dimensional Method (TDM). In order to run, open +the command line and include the .py file, the number of widths separated by spaces and +the .v file you would like to be created + +ie. 'python .\bsg_popcount_tdm.py 1 2 4 8 bsg_popcount_tdm.py' +""" + +fid = None +twire_ctr = 0 + + +############################################################ +# BIT +# +class BIT: + + def __init__(self, name, delay): + self.name = name + self.delay = delay + + def __str__(self): + return f'{self.name}' + + def __repr__(self): + return f'{self.name}' + + + +############################################################ +# WIRE initializes an array logic for a wire +# +def WIRE( name=None, dim=(1,), inst=True, delay=0.0 ): + global twire_ctr # wire counter + if not name: + name = f't{twire_ctr}_n' # creates wire variable in verilog + twire_ctr += 1 + + if inst: + fid.write(f' wire ') + for d in dim: + fid.write(f'[{d-1}:0]') # initializes size of logic inverilog + fid.write(f' {name};\n') + + if len(dim) == 1: + return [BIT(f'{name}[{i}]', delay) for i in range(dim[0])] # returns bit object for single bit + else: + return [WIRE(f'{name}[{i}]', dim[1:], False, delay) for i in range(dim[0])] + + + +############################################################ +# AND2 - A two input AND gate, assigns AND of x and y to z +# and calculates the output delay using gate delay +# and input delays +# +def AND2( x, y, z=None ): + z = WIRE()[0] if not z else z # initializes logic for z if not fed to input + fid.write(f' assign {z} = {x} & {y};\n') # writes assign AND gate to z to verilog file + z.delay = max(x.delay, y.delay) + 0.5 # calculates delay of max of 2 inputs + 0.5 AND delay + return z + + + +############################################################ +# OR3 - 3 input OR gate, assigns the OR of w, x, and y to +# z, calculates output delay +# +def OR3( w, x, y, z=None ): + z = WIRE()[0] if not z else z + fid.write(f' assign {z} = {w} | {x} | {y};\n') + z.delay = max(w.delay, x.delay, y.delay) + 0.5 # OR3 gate delay is 0.5 + return z + + +############################################################ +# XOR2 - 2 input XOR gate, assigns the XOR of x and y and +# assigns to z, calculates output delay +# +def XOR2( x, y, z=None ): + z = WIRE()[0] if not z else z + fid.write(f' assign {z} = {x} ^ {y};\n') + z.delay = max(x.delay, y.delay) + 1 # XOR2 gate delay is 1 + return z + + + +############################################################ +# ASSIGN - assign statement, assigns x to z logic, sets z +# delay to the x delay +# +def ASSIGN( x, z=None ): + z = WIRE()[0] if not z else z + fid.write(f' assign {z} = {x};\n') + z.delay = x.delay + 0.0 + return z + + + +############################################################ +# TIELO - tie logic low, assigns z to 1'b0 +# +def TIELO( z=None ): + z = WIRE()[0] if not z else z + fid.write(f' assign {z} = 1\'b0;\n') + return z + + + +############################################################ +# TIEHI - tie logic high, assigns z to 1'b1 +# +def TIEHI( z=None ): + z = WIRE()[0] if not z else z + fid.write(f' assign {z} = 1\'b1;\n') + return z + + + +############################################################ +# HA (Half Adder) - takes the sum of x and y, returns sum +# and carry out +# +def HA( x, y, s=None, c=None ): + s = WIRE()[0] if not s else s + c = WIRE()[0] if not c else c + return ( XOR2(x, y, s), AND2(x, y, c) ) + + + +############################################################ +# FA (Full Adder) - takes sum of x and y with z as carry-in +# returns the sum and carryout +# +# Note: z is faster to s than x or y! +# +def FA( x, y, z, s=None, c=None ): + s = WIRE()[0] if not s else s + c = WIRE()[0] if not c else c + # slower logic (x, y) calculated in an earlier XOR gate in order to balance the delays of x, y and z + return ( XOR2(XOR2(x, y), z, s), OR3(AND2(x, y), AND2(x, z), AND2(y, z), c) ) + + + +############################################################ +# TDM - Three Dimensional Method for adding the 1's of a +# binary value, takes a list of lists and returns a +# sum (s), carry (c), and the final sum (s + c) +# +def TDM( columns, s=None, c=None, sum=None): + + s = WIRE(dim=(len(columns),)) if not s else s # create empty array if sum not passed in + c = WIRE(dim=(len(columns),)) if not c else c + sum = WIRE(dim=(len(columns),)) if not sum else sum + + for i,col in enumerate(columns): + inputs = col + + foobar = WIRE(f'COLUMN{i}',dim=(len(inputs),)) + for qi,q in enumerate(inputs): + ASSIGN(q, foobar[qi]) + + + if len(inputs) == 0: # sum and carry are zero if input is width 0 + TIELO( s[i] ) + TIELO( c[i] ) + + while (len(inputs) > 0): + inputs = sorted(inputs, key=lambda y: y.delay) # sorting input bits by delay + + if len(inputs) == 1: # sum is input if input is width 1, no carry + ASSIGN( inputs.pop(0), s[i] ) + TIELO( c[i] ) + + elif len(inputs) == 2: # sum tied to bit 0 and carry tied to bit 1 + ASSIGN( inputs.pop(0), s[i] ) + ASSIGN( inputs.pop(0), c[i] ) + + elif len(inputs) == 3: # half adder adds bits 1 and 0 and assigns to sum + (_,carry) = HA( inputs.pop(0), inputs.pop(0), s[i], None ) + ASSIGN( inputs.pop(0), c[i] ) # bit 2 assigned to carry + columns[i+1].append(carry) # result from half adder is appended to next radix + + elif len(inputs) == 4: # full adder assigns bits 2 to 0 to inputs and carry-in + (_,carry) = FA( inputs.pop(0), inputs.pop(0), inputs.pop(0), s[i], None ) + ASSIGN( inputs.pop(0), c[i] ) # bit 3 is assigned to carry + columns[i+1].append(carry) # result from full adder is appended to next radix + + else: + (t,carry) = FA(inputs.pop(0), inputs.pop(0), inputs.pop(0)) + inputs.append(t) # for widths > 4, sum is added back to input array + columns[i+1].append(carry) + + sum = s + c + return sum + + + +############################################################ +# Print Begin Module - initializes logic for I/O and +# creates verilog file, writes logic +# variables for input and output +# +def print_begin_module(): + global fid + fid = open(f'bsg_popcount_tdm.v', 'w') + fid.write(f'// Popcount TDM Generator\n') + fid.write(f'// Automatically generated using bsg_popcount_tdm.py\n') + fid.write(f'// DO NOT MODIFY\n') + fid.write(f'// This generator can create a popcount adder tree based on\n') + fid.write(f'// how many widths were specified in the main .py file\n') + fid.write(f'\n') + fid.write(f'module bsg_popcount_tdm #(parameter `BSG_INV_PARAM(width_p=4))\n') # initializes module + fid.write(f' (input [width_p-1:0] i\n') # defines input and output logic + fid.write(f' , output [$clog2(width_p+1)-1:0] o\n') # generates count size based on input width + fid.write(f' );\n' ) + fid.write(f'\n') + + + +############################################################ +# Print Generate - initializes the list of lists or +# columns to do the adding, calls TDM to +# create the adder trees for any number of +# widths, creates if statments in verilog +# and stores the final sum +# +def print_generate(width_p_arr): + # initialize variables for the TDM adder for the first width + num_columns_arr = [math.ceil(math.log2(i+1)) for i in width_p_arr] # populates array with number of columns for each corresponding width + columns = [[] for i in range(num_columns_arr[0])] # create empty columns + bits_lp = WIRE("i", dim=(width_p_arr[0],), inst=False) # initialize array for input bits + columns[0] = [bits_lp[i] for i in range(width_p_arr[0])] # set elements of input bits to first column + s_o = WIRE('s_o', dim=(num_columns_arr[0],), inst=False) # initialize the sum list + c_o = WIRE('c_o', dim=(num_columns_arr[0],), inst=False) # initialize the carry list + sum_o = WIRE('sum_o', dim=(num_columns_arr[0],), inst=False) # initialize the final sum list (sum + carry) + + # if statement to check if the first width matches input and calls TDM + fid.write(f' if (width_p == {width_p_arr[0]}) begin: width_{width_p_arr[0]} \n') + fid.write(f' wire [$clog2(width_p+1)-1:0] s_o;\n') + fid.write(f' wire [$clog2(width_p+1)-1:0] c_o;\n') + TDM(columns, s_o, c_o, sum_o) + fid.write(f' assign o = s_o + c_o;\n') # adds the final sum and carry values + print_max_delay(s_o, c_o) + fid.write(f' end\n') + fid.write(f'\n') + + # generates an else if statement for the rest of the widths and calls TDM + if (len(width_p_arr) > 1): + for j in range(1, len(width_p_arr)): + s_o = WIRE('s_o', dim=(num_columns_arr[j],), inst=False) + c_o = WIRE('c_o', dim=(num_columns_arr[j],), inst=False) + + sum_o = WIRE('sum_o', dim=(num_columns_arr[j],), inst=False) + fid.write(f' else if (width_p == {width_p_arr[j]}) begin: width_{width_p_arr[0]} \n') + fid.write(f' wire [$clog2(width_p+1)-1:0] s_o;\n') + fid.write(f' wire [$clog2(width_p+1)-1:0] c_o;\n') + columns = [[] for i in range(num_columns_arr[j])] + bits_lp = WIRE("i", dim=(width_p_arr[j],), inst=False) + columns[0] = [bits_lp[i] for i in range(width_p_arr[j])] + TDM(columns, s_o, c_o, sum_o) + fid.write(f' assign o = s_o + c_o;\n') # adds the final sum and carry values + print_max_delay(s_o, c_o) + fid.write(f' end\n') + fid.write(f'\n') + fid.write(f'\n') + + + +############################################################ +# Print End Module - gets the max delay of a single TDM +# tree +def print_max_delay(s_o, c_o): + max_s = 0 + for s in s_o: + max_s = max(max_s, s.delay) # calculates maximum delay (critical path) of sum + fid.write(f' // {s} --> {s.delay}\n') # adds delay to comments in verilog + max_c = 0 + for c in c_o: + max_c = max(max_c, c.delay) + fid.write(f' // {c} --> {c.delay}\n') + fid.write(f'\n') + fid.write(f' // max S --> {max_s}\n') + fid.write(f' // max C --> {max_c}\n') + fid.write(f'\n') + + + +############################################################ +# Print End Module - ends the verilog module, closes file +# +def print_end_module(): + fid.write(f'endmodule // bsg_popcount_tdm\n') + fid.close() + + + +############################################################ +# Generate popcount -- Uses TDM to count the number of ones +# for a given width +# +def popcount_gen(width_p_arr): + print_begin_module() # creates and writes "popcount_tdm.v" + print_generate(width_p_arr) + print_end_module() + + + +############################################################ +# Generate popcount -- Check if a string can be converted +# to an integer +# +def is_int(s): + try: + int(s) + return True + except ValueError: + return False + + + +############################################################ +# Get Widths -- Accepts a command line argument like +# ".\bsg_popcount_tdm.py 1 2 4 8 file.v" +# and creates output file and array of widths +# +def get_widths(): + args = sys.argv[1:] # Get command line arguments excluding the script name + width_p_arr = [] # create empty list to store the integers + file_name = None # create a variable to store the file name + + for arg in args: # Loop through the command line arguments + if is_int(arg): # Check if the argument is an integer + width_p_arr.append(int(arg)) # add integer width to list + else: + file_name = arg # if the argument is not a width, it is a filename + + return width_p_arr + + + +if __name__ == '__main__': + width_p_arr = get_widths() # default array before command line is implemented + popcount_gen(width_p_arr) # generates a "popcount_tdm.v" file for width of 8 diff --git a/bsg_misc/bsg_popcount_tdm.v b/bsg_misc/bsg_popcount_tdm.v new file mode 100644 index 000000000..122a4c491 --- /dev/null +++ b/bsg_misc/bsg_popcount_tdm.v @@ -0,0 +1,169 @@ +// Popcount TDM Generator +// Automatically generated using bsg_popcount_tdm.py +// DO NOT MODIFY +// This generator can create a popcount adder tree based on +// how many widths were specified in the main .py file + +module bsg_popcount_tdm #(parameter `BSG_INV_PARAM(width_p=4)) + (input [width_p-1:0] i + , output [$clog2(width_p+1)-1:0] o + ); + + if (width_p == 1) begin: width_1 + wire [$clog2(width_p+1)-1:0] s_o; + wire [$clog2(width_p+1)-1:0] c_o; + wire [0:0] COLUMN0; + assign COLUMN0[0] = i[0]; + assign s_o[0] = i[0]; + assign c_o[0] = 1'b0; + assign o = s_o + c_o; + // s_o[0] --> 0.0 + // c_o[0] --> 0.0 + + // max S --> 0 + // max C --> 0 + + end + + else if (width_p == 2) begin: width_1 + wire [$clog2(width_p+1)-1:0] s_o; + wire [$clog2(width_p+1)-1:0] c_o; + wire [1:0] COLUMN0; + assign COLUMN0[0] = i[0]; + assign COLUMN0[1] = i[1]; + assign s_o[0] = i[0]; + assign c_o[0] = i[1]; + wire [-1:0] COLUMN1; + assign s_o[1] = 1'b0; + assign c_o[1] = 1'b0; + assign o = s_o + c_o; + // s_o[0] --> 0.0 + // s_o[1] --> 0.0 + // c_o[0] --> 0.0 + // c_o[1] --> 0.0 + + // max S --> 0 + // max C --> 0 + + end + + else if (width_p == 4) begin: width_1 + wire [$clog2(width_p+1)-1:0] s_o; + wire [$clog2(width_p+1)-1:0] c_o; + wire [3:0] COLUMN0; + assign COLUMN0[0] = i[0]; + assign COLUMN0[1] = i[1]; + assign COLUMN0[2] = i[2]; + assign COLUMN0[3] = i[3]; + wire [0:0] t0_n; + wire [0:0] t1_n; + assign t1_n[0] = i[0] ^ i[1]; + assign s_o[0] = t1_n[0] ^ i[2]; + wire [0:0] t2_n; + assign t2_n[0] = i[0] & i[1]; + wire [0:0] t3_n; + assign t3_n[0] = i[0] & i[2]; + wire [0:0] t4_n; + assign t4_n[0] = i[1] & i[2]; + assign t0_n[0] = t2_n[0] | t3_n[0] | t4_n[0]; + assign c_o[0] = i[3]; + wire [0:0] COLUMN1; + assign COLUMN1[0] = t0_n[0]; + assign s_o[1] = t0_n[0]; + assign c_o[1] = 1'b0; + wire [-1:0] COLUMN2; + assign s_o[2] = 1'b0; + assign c_o[2] = 1'b0; + assign o = s_o + c_o; + // s_o[0] --> 2.0 + // s_o[1] --> 1.0 + // s_o[2] --> 0.0 + // c_o[0] --> 0.0 + // c_o[1] --> 0.0 + // c_o[2] --> 0.0 + + // max S --> 2.0 + // max C --> 0 + + end + + else if (width_p == 8) begin: width_1 + wire [$clog2(width_p+1)-1:0] s_o; + wire [$clog2(width_p+1)-1:0] c_o; + wire [7:0] COLUMN0; + assign COLUMN0[0] = i[0]; + assign COLUMN0[1] = i[1]; + assign COLUMN0[2] = i[2]; + assign COLUMN0[3] = i[3]; + assign COLUMN0[4] = i[4]; + assign COLUMN0[5] = i[5]; + assign COLUMN0[6] = i[6]; + assign COLUMN0[7] = i[7]; + wire [0:0] t5_n; + wire [0:0] t6_n; + wire [0:0] t7_n; + assign t7_n[0] = i[0] ^ i[1]; + assign t5_n[0] = t7_n[0] ^ i[2]; + wire [0:0] t8_n; + assign t8_n[0] = i[0] & i[1]; + wire [0:0] t9_n; + assign t9_n[0] = i[0] & i[2]; + wire [0:0] t10_n; + assign t10_n[0] = i[1] & i[2]; + assign t6_n[0] = t8_n[0] | t9_n[0] | t10_n[0]; + wire [0:0] t11_n; + wire [0:0] t12_n; + wire [0:0] t13_n; + assign t13_n[0] = i[3] ^ i[4]; + assign t11_n[0] = t13_n[0] ^ i[5]; + wire [0:0] t14_n; + assign t14_n[0] = i[3] & i[4]; + wire [0:0] t15_n; + assign t15_n[0] = i[3] & i[5]; + wire [0:0] t16_n; + assign t16_n[0] = i[4] & i[5]; + assign t12_n[0] = t14_n[0] | t15_n[0] | t16_n[0]; + wire [0:0] t17_n; + wire [0:0] t18_n; + assign t18_n[0] = i[6] ^ i[7]; + assign s_o[0] = t18_n[0] ^ t5_n[0]; + wire [0:0] t19_n; + assign t19_n[0] = i[6] & i[7]; + wire [0:0] t20_n; + assign t20_n[0] = i[6] & t5_n[0]; + wire [0:0] t21_n; + assign t21_n[0] = i[7] & t5_n[0]; + assign t17_n[0] = t19_n[0] | t20_n[0] | t21_n[0]; + assign c_o[0] = t11_n[0]; + wire [2:0] COLUMN1; + assign COLUMN1[0] = t6_n[0]; + assign COLUMN1[1] = t12_n[0]; + assign COLUMN1[2] = t17_n[0]; + wire [0:0] t22_n; + assign s_o[1] = t6_n[0] ^ t12_n[0]; + assign t22_n[0] = t6_n[0] & t12_n[0]; + assign c_o[1] = t17_n[0]; + wire [0:0] COLUMN2; + assign COLUMN2[0] = t22_n[0]; + assign s_o[2] = t22_n[0]; + assign c_o[2] = 1'b0; + wire [-1:0] COLUMN3; + assign s_o[3] = 1'b0; + assign c_o[3] = 1'b0; + assign o = s_o + c_o; + // s_o[0] --> 3.0 + // s_o[1] --> 2.0 + // s_o[2] --> 1.5 + // s_o[3] --> 0.0 + // c_o[0] --> 2.0 + // c_o[1] --> 3.0 + // c_o[2] --> 0.0 + // c_o[3] --> 0.0 + + // max S --> 3.0 + // max C --> 3.0 + + end + + +endmodule // bsg_popcount_tdm