Accelerated Arithmetic Operations with APM8S007 MDU

Geehy

Many individuals engaged in learning microcontrollers and embedded system development today often start with Arm-based 32-bit microcontrollers (MCUs). Consequently, for many people, 8-bit MCUs are considered a thing of the past and gradually fading from view. However, this perception is not entirely accurate. Data indicates that in the global MCU market in 2023, although 32-bit MCUs held the leading position with a market share of 55%, the “veteran warrior” 8-bit MCUs remain highly competitive, accounting for a significant market share of 43% and showing no signs of retreat.

For the 8051 core, multiplication and division can take up a lot of time, but using the MDU unit will significantly speed up these operations and expand the range of applications for the 8051 chip.

The APM8S007 is equipped with an MDU unit, which enables it to perform the following operations:

Overview of Functions:

Supports signed/unsigned arithmetic calculations
Supports 32-bit/16-bit division and 16-bit/16-bit division
Supports 16-bit x 16-bit multiplication
Supports 16-bit + 16-bit addition
Supports 16-bit - 16-bit subtraction
Supports 32-bit hardware accelerated square root

Computational Performance:
Assuming a system clock of 48M with each clock cycle being 1/48M or 20.8333ns, the following table presents the calculation time based on a system clock of 48M:

Operation Type Clock Cycles Required Time (us)

32/32 32 0.67

Square 43 0.90

16-16 10 0.21

32-32 14 0.29

16+16 10 0.21

16×16 21 0.44

16/16 21 0.44

Shift 10 0.21

Note: The above data represents estimated times, indicating the time required from the start to the end of the enabled operation. There may be some margin of error. Aligning operand addresses and result addresses to 2-byte boundaries can save time for internal DMA within the module, thereby achieving maximum performance.

CLK_CON2 |= CLK_MDU_EN(0x1);   // Enable MDU module clock
// Unsigned 32-bit/16-bit division, result is 48-bit: 0~31 bits for the quotient, 32~47 bits for the remainder
u32_data1 = 8712;           // Dividend
u16_data2 = 99;             // Divisor
u32_data3 = 0;              // Result (8712/99 = 88)
mdu_register(UN_DIV_32_16, 0, (u16)&u32_data1, (u16)&u16_data2, (u16)&u48_data3);
printf("UN_DIV_32_16: %ld/%d = %ld\r\n", u32_data1, u16_data2, *((u32 *)&u48_data3));
// Signed 32-bit/16-bit division, result is 48-bit: 0~31 bits for the quotient, 32~47 bits for the remainder
s32_data1 = -8712;          // Dividend
s16_data2 = 99;             // Divisor
u48_data3[0] = 0;           // Result (-8712/99 = -88)
u48_data3[1] = 0;
u48_data3[2] = 0;
mdu_register(DIV_32_16, 0, (u16)&s32_data1, (u16)&s16_data2, (u16)&u48_data3);
printf("   DIV_32_16: %ld/%d = %ld\r\n", s32_data1, s16_data2, *((s32 *)&u48_data3));
// Unsigned 16-bit/16-bit division, result is 32-bit: 0~15 bits for the quotient, 16~31 bits for the remainder
u16_data1 = 8712;           // Dividend
u16_data2 = 99;             // Divisor
u32_data3 = 0;              // Result (8712/99 = 88)
mdu_register(UN_DIV_16_16, 0, (u16)&u16_data1, (u16)&u16_data2, (u16)&u32_data3);
printf("UN_DIV_16_16: %d/%d = %d\r\n", u16_data1, u16_data2, *((u16 *)&u32_data3));
// Unsigned 16-bit * 16-bit
u16_data1 = 88;             // Multiplicand
u16_data2 = 99;             // Multiplier
u32_data3 = 0;              // Result (88 * 99 = 8712)
mdu_register(UN_MUL_16_16, 0, (u16)&u16_data1, (u16)&u16_data2, (u16)&u32_data3);
printf("UN_MUL_16_16: %d * %d = %ld\r\n", u16_data1, u16_data2, u32_data3);
// Signed 16-bit * 16-bit
u16_data1 = -88;            // Multiplicand
u16_data2 = 99;             // Multiplier
s32_data3 = 0;              // Result (-88 * 99 = -8712)
mdu_register(MUL_16_16, 0, (u16)&u16_data1, (u16)&u16_data2, (u16)&s32_data3);
printf("   MUL_16_16: %d * %d = %ld\r\n", u16_data1, u16_data2, s32_data3);
// Unsigned 16-bit - 16-bit
u16_data1 = 8712;           // Minuend
u16_data2 = 99;             // Subtrahend
u16_data3 = 0;              // Result (8712 - 99 = 8613)
mdu_register(UN_SUB_16_16, 0, (u16)&u16_data1, (u16)&u16_data2, (u16)&u16_data3);
printf("UN_SUB_16_16: %d - %d = %d\r\n", u16_data1, u16_data2, u16_data3);
// Signed 16-bit - 16-bit
s16_data1 = -8712;          // Minuend
s16_data2 = 99;             // Subtrahend
u32_data3 = 0;              // Result (-8712 - 99 = -8811)
mdu_register(DIV_16_16, 0, (u16)&s16_data1, (u16)&s16_data2, (u16)&u32_data3);
printf("   DIV_16_16: %d / %d = %d\r\n", s16_data1, s16_data2, *((s16 *)&u32_data3));
// Unsigned 32-bit - 32-bit
u32_data1 = 8712;           // Minuend
u32_data2 = 99;             // Subtrahend
u32_data3 = 0;              // Result (8712 - 99 = 8613)
mdu_register(UN_SUB_32_32, 0, (u16)&u32_data1, (u16)&u32_data2, (u16)&u32_data3);
printf("UN_SUB_32_32: %ld - %ld = %ld\r\n", u32_data1, u32_data2, u32_data3);
// Signed 32-bit - 32-bit
s32_data1 = -8712;          // Minuend
s32_data2 = 99;             // Subtrahend
s32_data3 = 0;              // Result (-8712 - 99 = 8811)
mdu_register(SUB_32_32, 0, (u16)&s32_data1, (u16)&s32_data2, (u16)&s32_data3);
printf("   SUB_32_32: %ld - %ld = %ld\r\n", s32_data1, s32_data2, s32_data3);
// Unsigned 16-bit + 16-bit
u16_data1 = 8712;           // Addend
u16_data2 = 99;             // Addend
u16_data3 = 0;              // Result (8712 + 99 = 8811)
mdu_register(UN_ADD_16_16, 0, (u16)&u16_data1, (u16)&u16_data2, (u16)&u16_data3);
printf("UN_ADD_16_16: %d + %d = %d\r\n", u16_data1, u16_data2, u16_data3);
// Signed 16-bit + 16-bit
s16_data1 = -8712;          // Addend
s16_data2 = 99;             // Addend
s16_data3 = 0;              // Result (-8712+99 = -8613)
mdu_register(ADD_16_16, 0, (u16)&s16_data1, (u16)&s16_data2, (u16)&s16_data3);
printf("   ADD_16_16: %d+%d = %d\r\n", s16_data1, s16_data2, s16_data3);
// Unsigned Left Shift <<
u32_data1 = 10000;          // Number to be shifted
shift_val = 3;              // Shift value
u32_data3 = 0;              // Result (10000<<3 = 80000)
mdu_register(UN_SHIFT, MDU_SIFT_SEL(0x0) | MDU_SIFT_NUM(shift_val), (u16)&u32_data1, 0, (u16)&u32_data3);
printf("    UN_SHIFT: %ld<<%bd = %ld\r\n", u32_data1, shift_val, u32_data3);
// Signed Left Shift <<
s32_data1 = -10000;         // Number to be shifted
shift_val = 3;              // Shift value
s32_data3 = 0;              // Result (-10000<<3 = -80000)
mdu_register(SHIFT, MDU_SIFT_SEL(0x0) | MDU_SIFT_NUM(shift_val), (u16)&s32_data1, 0, (u16)&s32_data3);
printf("      SHIFT: %ld<<%bd = %ld\r\n", s32_data1, shift_val, s32_data3);
// Unsigned Right Shift >>
u32_data1 = 10000;          // Number to be shifted
shift_val = 3;              // Shift value
u32_data3 = 0;              // Result (10000>>3 = 1250)
mdu_register(UN_SHIFT, MDU_SIFT_SEL(0x1) | MDU_SIFT_NUM(shift_val), (u16)&u32_data1, 0, (u16)&u32_data3);
printf("    UN_SHIFT: %ld>>%bd = %ld\r\n", u32_data1, shift_val, u32_data3);
// Signed Right Shift >>
s32_data1 = -10000;         // Number to be shifted
shift_val = 3;              // Shift value
s32_data3 = 0;              // Result (-10000>>3 = -1250)
mdu_register(SHIFT, MDU_SIFT_SEL(0x1) | MDU_SIFT_NUM(shift_val), (u16)&s32_data1, 0, (u16)&s32_data3);
printf("      SHIFT: %ld>>%bd = %ld\r\n", s32_data1, shift_val, s32_data3);
// Unsigned 16-bit * 16-bit <<
u16_data1 = 88;             // Multiplicand
u16_data2 = 99;             // Multiplier
shift_val = 3;              // Shift value
u32_data3 = 0;              // Result (88*99<<3 = 69696)
mdu_register(UN_MUL_16_16, MDU_SIFT_SEL(0x0) | MDU_SIFT_NUM(shift_val), (u16)&u16_data1, (u16)&u16_data2, (u16)&u32_data3);
printf("UN_MUL_16_16_SHIFT: %d*%d<<%bd = %ld\r\n", u16_data1, u16_data2, shift_val, u32_data3);
// Unsigned 16-bit * 16-bit >>
u16_data1 = 88;             // Multiplicand
u16_data2 = 99;             // Multiplier
shift_val = 3;              // Shift value
u32_data3 = 0;              // Result (88*99>>3 = 1089)
mdu_register(UN_MUL_16_16, MDU_SIFT_SEL(0x1) | MDU_SIFT_NUM(shift_val), (u16)&u16_data1, (u16)&u16_data2, (u16)&u32_data3);
printf("   MUL_16_16_SHIFT: %d*%d>>%bd = %ld\r\n", u16_data1, u16_data2, shift_val, u32_data3);
// Square Root
u32_data1 = 9801;           // Number
u16_data3 = 0;              // Result (SQRT(8712)= 99)
mdu_register(UN_SQRT, 0, (u16)&u32_data1, 0, (u16)&u16_data3);
printf("     UN_SQRT: SQRT(%ld) = %d\r\n", u32_data1, u16_data3);

This unit accelerates multiplication and division operations, allowing applications to be accomplished using the 8051 chip, thereby reducing overall costs and maximizing resource utilization.