Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revert "UAVCAN performance audit" #6846

Merged
merged 1 commit into from
Mar 18, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 4 additions & 14 deletions Debug/poor-mans-profiler.sh
Original file line number Diff line number Diff line change
@@ -1,20 +1,12 @@
#!/bin/bash
#
# Author: Pavel Kirienko <[email protected]>
#
# Poor man's sampling profiler for NuttX.
#
# Usage: Install flamegraph.pl in your PATH, configure your .gdbinit, run the script with proper arguments and go
# have a coffee. When you're back, you'll see the flamegraph. Note that frequent calls to GDB significantly
# interfere with normal operation of the target, which means that you can't profile real-time tasks with it.
# For best results, ensure that the PC is not overloaded, the USB host controller to which the debugger is
# connected is not congested. You should also allow the current user to set negative nice values.
#
# The FlameGraph script can be downloaded from https://github.com/brendangregg/FlameGraph. Thanks Mr. Gregg.
#
# Requirements: ARM GDB with Python support. You can get one by downloading the sources from
# https://launchpad.net/gcc-arm-embedded and building them with correct flags.
# Note that Python support is not required if no per-task sampling is needed.
# Requirements: ARM GDB with Python support
#

set -e
Expand All @@ -41,7 +33,7 @@ which flamegraph.pl > /dev/null || die "Install flamegraph.pl first"
nsamples=0
sleeptime=0.1 # Doctors recommend 7-8 hours a day
taskname=
elf=
elf=$root/Build/px4fmu-v2_default.build/firmware.elf
append=0
fgfontsize=10
fgwidth=1900
Expand Down Expand Up @@ -77,8 +69,6 @@ do
shift
done

[[ -z "$elf" ]] && die "Please specify the ELF file location, e.g.: build_px4fmu-v4_default/src/firmware/nuttx/firmware_nuttx"

#
# Temporary files
#
Expand Down Expand Up @@ -247,8 +237,8 @@ for s, f in sorted(stacks.items(), key=lambda (s, f): s):

print('Total stack frames:', num_stack_frames, file=sys.stderr)
print('Top consumers (distribution of the stack tops):', file=sys.stderr)
for name,num in sorted(stack_tops.items(), key=lambda (name, num): num, reverse=True)[:300]:
print('% 7.3f%% ' % (100 * num / num_stack_frames), name, file=sys.stderr)
for name,num in sorted(stack_tops.items(), key=lambda (name, num): num, reverse=True)[:10]:
print('% 5.1f%% ' % (100 * num / num_stack_frames), name, file=sys.stderr)
EOF

cat $stacksfile | python /tmp/pmpn-folder.py > $foldfile
Expand Down
1 change: 0 additions & 1 deletion ROMFS/px4fmu_common/init.d/4012_quad_x_can
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,4 @@ then
param set MC_YAWRATE_D 0.0
fi

set MIXER quad_x_can
set OUTPUT_MODE uavcan_esc
1 change: 0 additions & 1 deletion ROMFS/px4fmu_common/mixers/quad_x_can.main.mix

This file was deleted.

2 changes: 1 addition & 1 deletion src/lib/matrix
19 changes: 0 additions & 19 deletions src/modules/uavcan/actuators/esc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,25 +136,6 @@ void UavcanEscController::update_outputs(float *outputs, unsigned num_outputs)
}
}

/*
* Remove channels that are always zero.
* The objective of this optimization is to avoid broadcasting multi-frame transfers when a single frame
* transfer would be enough. This is a valid optimization as the UAVCAN specification implies that all
* non-specified ESC setpoints should be considered zero.
* The positive outcome is a (marginally) lower bus traffic and lower CPU load.
*
* From the standpoint of the PX4 architecture, however, this is a hack. It should be investigated why
* the mixer returns more outputs than are actually used.
*/
for (int index = int(msg.cmd.size()) - 1; index >= _max_number_of_nonzero_outputs; index--) {
if (msg.cmd[index] != 0) {
_max_number_of_nonzero_outputs = index + 1;
break;
}
}

msg.cmd.resize(_max_number_of_nonzero_outputs);

/*
* Publish the command message to the bus
* Note that for a quadrotor it takes one CAN frame
Expand Down
1 change: 0 additions & 1 deletion src/modules/uavcan/actuators/esc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,6 @@ class UavcanEscController
* ESC states
*/
uint32_t _armed_mask = 0;
uint8_t _max_number_of_nonzero_outputs = 0;

/*
* Perf counters
Expand Down
24 changes: 24 additions & 0 deletions src/modules/uavcan/uavcan_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,18 @@ UavcanNode::UavcanNode(uavcan::ICanDriver &can_driver, uavcan::ISystemClock &sys
}
/* _server_command_sem use case is a signal */
px4_sem_setprotocol(&_server_command_sem, SEM_PRIO_NONE);

if (_perfcnt_node_spin_elapsed == nullptr) {
errx(1, "uavcan: couldn't allocate _perfcnt_node_spin_elapsed");
}

if (_perfcnt_esc_mixer_output_elapsed == nullptr) {
errx(1, "uavcan: couldn't allocate _perfcnt_esc_mixer_output_elapsed");
}

if (_perfcnt_esc_mixer_total_elapsed == nullptr) {
errx(1, "uavcan: couldn't allocate _perfcnt_esc_mixer_total_elapsed");
}
}

UavcanNode::~UavcanNode()
Expand Down Expand Up @@ -152,6 +164,9 @@ UavcanNode::~UavcanNode()

_instance = nullptr;

perf_free(_perfcnt_node_spin_elapsed);
perf_free(_perfcnt_esc_mixer_output_elapsed);
perf_free(_perfcnt_esc_mixer_total_elapsed);
pthread_mutex_destroy(&_node_mutex);
px4_sem_destroy(&_server_command_sem);

Expand Down Expand Up @@ -682,6 +697,7 @@ int UavcanNode::init(uavcan::NodeID node_id)

void UavcanNode::node_spin_once()
{
perf_begin(_perfcnt_node_spin_elapsed);
const int spin_res = _node.spinOnce();

if (spin_res < 0) {
Expand All @@ -692,6 +708,8 @@ void UavcanNode::node_spin_once()
if (_tx_injector != nullptr) {
_tx_injector->injectTxFramesInto(_node);
}

perf_end(_perfcnt_node_spin_elapsed);
}

/*
Expand Down Expand Up @@ -850,8 +868,12 @@ int UavcanNode::run()
// Mutex is unlocked while the thread is blocked on IO multiplexing
(void)pthread_mutex_unlock(&_node_mutex);

perf_end(_perfcnt_esc_mixer_total_elapsed); // end goes first, it's not a mistake

const int poll_ret = ::poll(_poll_fds, _poll_fds_num, PollTimeoutMs);

perf_begin(_perfcnt_esc_mixer_total_elapsed);

(void)pthread_mutex_lock(&_node_mutex);

node_spin_once(); // Non-blocking
Expand Down Expand Up @@ -943,7 +965,9 @@ int UavcanNode::run()

// Output to the bus
_outputs.timestamp = hrt_absolute_time();
perf_begin(_perfcnt_esc_mixer_output_elapsed);
_esc_controller.update_outputs(_outputs.output, _outputs.noutputs);
perf_end(_perfcnt_esc_mixer_output_elapsed);
}


Expand Down
4 changes: 4 additions & 0 deletions src/modules/uavcan/uavcan_main.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,10 @@ class UavcanNode : public device::CDev
// index into _poll_fds for each _control_subs handle
uint8_t _poll_ids[NUM_ACTUATOR_CONTROL_GROUPS_UAVCAN];

perf_counter_t _perfcnt_node_spin_elapsed = perf_alloc(PC_ELAPSED, "uavcan_node_spin_elapsed");
perf_counter_t _perfcnt_esc_mixer_output_elapsed = perf_alloc(PC_ELAPSED, "uavcan_esc_mixer_output_elapsed");
perf_counter_t _perfcnt_esc_mixer_total_elapsed = perf_alloc(PC_ELAPSED, "uavcan_esc_mixer_total_elapsed");

void handle_time_sync(const uavcan::TimerEvent &);

typedef uavcan::MethodBinder<UavcanNode *, void (UavcanNode::*)(const uavcan::TimerEvent &)> TimerCallback;
Expand Down
6 changes: 3 additions & 3 deletions src/systemcmds/tests/test_matrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -317,9 +317,9 @@ bool MatrixTest::filterTests()

bool MatrixTest::helperTests()
{
ut_test(::fabs(wrap_pi(4.0) - (4.0 - 2 * M_PI)) < 1e-5);
ut_test(::fabs(wrap_pi(-4.0) - (-4.0 + 2 * M_PI)) < 1e-5);
ut_test(::fabs(wrap_pi(3.0) - (3.0)) < 1e-3);
ut_test(fabs(wrap_pi(4.0) - (4.0 - 2 * M_PI)) < 1e-5);
ut_test(fabs(wrap_pi(-4.0) - (-4.0 + 2 * M_PI)) < 1e-5);
ut_test(fabs(wrap_pi(3.0) - (3.0)) < 1e-3);
wrap_pi(NAN);

Vector3f a(1, 2, 3);
Expand Down