Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Failures view #662

Merged
merged 4 commits into from
Mar 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,6 @@ docker::run { 'puppetboard':
}
```


We also provide the Dockerfile so you can build the image yourself:
```bash
docker build -t puppetboard .
Expand Down Expand Up @@ -161,7 +160,7 @@ Other settings that might be interesting in no particular order:
- `SECRET_KEY`: Refer to [Flask documentation](https://flask.palletsprojects.com/en/1.1.x/quickstart/#sessions),
section "How to generate good secret keys" for more info. Defaults to a random 24-char string generated by
`os.random(24)`.
- `PUPPETDB_TIMEOUT`: Defaults to 20 seconds but you might need to increase this value. It depends on how big the
- `PUPPETDB_TIMEOUT`: Defaults to 20 seconds, but you might need to increase this value. It depends on how big the
results are when querying PuppetDB. This behaviour will change in a future release when pagination will be introduced.
- `UNRESPONSIVE_HOURS`: The amount of hours since the last check-in after which a node is considered unresponsive.
- `LOGLEVEL`: A string representing the loglevel. It defaults to `'info'` but can be changed to `'warning'` or
Expand Down Expand Up @@ -200,7 +199,7 @@ Other settings that might be interesting in no particular order:
to `False` gives performance benefits, especially in big Puppet environments (more than few hundreds of nodes).
Defaults to `True`.
- `DEV_LISTEN_HOST`: For use with dev.py for development. Default is localhost
- `DEV_LISTEN_PORT`: For use with dev.py for development. Default is 5000
- `DEV_LISTEN_PORT`: For use with dev.py for development. Default is 5555

## Getting Help

Expand Down
5 changes: 4 additions & 1 deletion puppetboard/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
import puppetboard.views.radiator # noqa: F401
# noinspection PyUnresolvedReferences
import puppetboard.views.reports # noqa: F401
# noinspection PyUnresolvedReferences
import puppetboard.views.failures # noqa: F401


from puppetboard.core import get_app, get_puppetdb
Expand All @@ -41,14 +43,15 @@

menu_entries = [
('index', 'Overview'),
('failures', 'Failures'),
('nodes', 'Nodes'),
('facts', 'Facts'),
('reports', 'Reports'),
('metrics', 'Metrics'),
('inventory', 'Inventory'),
('catalogs', 'Catalogs'),
('radiator', 'Radiator'),
('query', 'Query')
('query', 'Query'),
]

if not app.config.get('ENABLE_QUERY'):
Expand Down
2 changes: 2 additions & 0 deletions puppetboard/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ def environments():
return x


# as documented in
# https://flask.palletsprojects.com/en/2.0.x/patterns/streaming/#streaming-from-templates
def stream_template(template_name, **context):
app = get_app()
app.update_template_context(context)
Expand Down
5 changes: 4 additions & 1 deletion puppetboard/default_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
DEFAULT_ENVIRONMENT = 'production'
SECRET_KEY = os.urandom(24)
DEV_LISTEN_HOST = '127.0.0.1'
DEV_LISTEN_PORT = 5000
DEV_LISTEN_PORT = 5555
DEV_COFFEE_LOCATION = 'coffee'
UNRESPONSIVE_HOURS = 2
ENABLE_QUERY = True
Expand Down Expand Up @@ -53,3 +53,6 @@
DAILY_REPORTS_CHART_ENABLED = True
DAILY_REPORTS_CHART_DAYS = 8
WITH_EVENT_NUMBERS = True

SHOW_ERROR_AS = 'friendly' # or 'raw'
CODE_PREFIX_TO_REMOVE = '/etc/puppetlabs/code/environments'
5 changes: 4 additions & 1 deletion puppetboard/docker_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def coerce_bool(v, default):
DEFAULT_ENVIRONMENT = os.getenv('DEFAULT_ENVIRONMENT', 'production')
SECRET_KEY = os.getenv('SECRET_KEY', os.urandom(24))
DEV_LISTEN_HOST = os.getenv('DEV_LISTEN_HOST', '127.0.0.1')
DEV_LISTEN_PORT = int(os.getenv('DEV_LISTEN_PORT', '5000'))
DEV_LISTEN_PORT = int(os.getenv('DEV_LISTEN_PORT', '5555'))
DEV_COFFEE_LOCATION = os.getenv('DEV_COFFEE_LOCATION', 'coffee')
UNRESPONSIVE_HOURS = int(os.getenv('UNRESPONSIVE_HOURS', '2'))
ENABLE_QUERY = os.getenv('ENABLE_QUERY', 'True')
Expand Down Expand Up @@ -125,3 +125,6 @@ def coerce_bool(v, default):
DAILY_REPORTS_CHART_DAYS = int(os.getenv('DAILY_REPORTS_CHART_DAYS', '8'))

WITH_EVENT_NUMBERS = coerce_bool(os.getenv('WITH_EVENT_NUMBERS'), True)

SHOW_ERROR_AS = os.getenv('SHOW_ERROR_AS', 'friendly')
CODE_PREFIX_TO_REMOVE = os.getenv('CODE_PREFIX_TO_REMOVE', '/etc/puppetlabs/code/environments')
3 changes: 2 additions & 1 deletion puppetboard/static/css/puppetboard.css
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,8 @@ a .key:hover {

textarea, /* query */
pre.result,
pre.error_message {
pre.error_message,
td.error_message {
font-family: "Cousine", monospace;
font-size: 1rem;
}
Expand Down
56 changes: 56 additions & 0 deletions puppetboard/templates/failures.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
{% extends 'layout.html' %}
{% import '_macros.html' as macros %}
{% block content %}
<div class="ui fluid icon input hide" style="margin-bottom:20px">
<input autofocus="autofocus" class="filter-table" placeholder="Type here to filter...">
</div>
<div id="checkbox_div" class="ui toggle {% if (current_show_error_as == 'friendly') %}checked{% endif %} checkbox">
<input type="checkbox" {% if (current_show_error_as == 'friendly') %}checked=""{% endif %}">
<label>Friendly error messages</label>
</div>
<table id="failures_table" class='ui basic vertically padded sortable fixed wrapped table'>
<thead>
<tr>
<th class="default default-sort four wide">Certname</th>
<th class="date two wide">Report time</th>
<th class="date ten wide">Error</th>
</tr>
</thead>
<tbody class="searchable">
{% for failure in failures %}
<tr>
<td>
<a href="{{url_for('node', env=current_env, node_name=failure.certname)}}">
{{failure.certname}}
</a>
</td>
<td>
<a href="{{url_for('report', env=current_env, node_name=failure.certname, report_id=failure.report_hash)}}" rel="utctimestamp">
{{failure.timestamp}}
</a>
</td>
<td class="negative error_message">
{{failure.error | safe}}
</td>
</tr>
{% endfor %}
</tbody>
</table>

<script>
$('#checkbox_div').checkbox(
'setting', 'onChange', function() {
let url = window.location.href;
if (url.endsWith("/raw")) {
url = url.replaceAll(/\/raw$/ig, '/friendly')
} else if (url.endsWith("/friendly")) {
url = url.replaceAll(/\/friendly$/ig, '/raw')
} else {
url += '/raw'
}
window.location.href = url
}
);
</script>

{% endblock content %}
144 changes: 144 additions & 0 deletions puppetboard/views/failures.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
import re

from flask import Response, stream_with_context, abort
from pypuppetdb.QueryBuilder import AndOperator, EqualsOperator

from puppetboard.core import get_app, get_puppetdb, environments, stream_template
from puppetboard.utils import check_env, yield_or_stop

app = get_app()
puppetdb = get_puppetdb()


def get_raw_error(source: str, message: str) -> str:
# prefix with source, if it's not trivial
if source != 'Puppet':
message = source + "\n\n" + message

if '\n' in message:
message = f"<pre>{message}</pre>"

return message


def get_friendly_error(source: str, message: str, certname: str) -> str:
# NOTE: the order of the below operations matters in some cases!

# prefix with source, if it's not trivial
if source != 'Puppet':
message = source + "\n\n" + message

# shorten the file paths
code_prefix_to_remove = app.config['CODE_PREFIX_TO_REMOVE']
message = re.sub(f'file: {code_prefix_to_remove}', 'file: …', message)

# remove some unuseful parts
too_long_prefix = "Could not retrieve catalog from remote server: " \
"Error 500 on SERVER: " \
"Server Error: "
message = re.sub(f'^{too_long_prefix}', '', message)

message = re.sub(r"(Evaluation Error: Error while evaluating a )",
r"Error while evaluating a ", message)

# remove redundant certnames
redundant_certname = f" on node {certname}"
message = re.sub(f'{redundant_certname}$', '', message)

redundant_certname = f" for {certname}"
message = re.sub(f'{redundant_certname} ', ' ', message)

# add extra line breaks for readability
message = re.sub(r"(Error while evaluating a .*?),",
r"\1:\n\n", message)

message = re.sub(r"( returned \d+:) ",
r"\1\n\n", message)

# reformat and rephrase ending expression that says where in the code is the error
# NOTE: this has to be done AFTER removing " on node ..."
# but BEFORE replacing spaces with &nbsp;
message = re.sub(r"(\S)\s+\(file: ([0-9a-zA-Z/_\-.…]+, line: \d+, column: \d+)\)\s*$",
r"\1\n\n…in \2.", message)

message = re.sub(r"(\S)\s+\(file: ([0-9a-zA-Z/_\-.…]+, line: \d+)\)\s*$",
r"\1\n\n…in \2.", message)

return message


def to_html(message: str) -> str:
# replace \n with <br/> to not have to use <pre> which breaks wrapping
message = re.sub(r"\n", "<br/>", message)

# prevent line breaking inside expressions that provide code location
message = re.sub(r"\(file: (.*?), line: (.*?), column: (.*?)\)",
r"(file:&nbsp;\1,&nbsp;line:&nbsp;\2,&nbsp;column:&nbsp;\3)", message)
message = re.sub(r"\(file: (.*?), line: (.*?)\)",
r"(file:&nbsp;\1,&nbsp;line:&nbsp;\2)", message)

return message


@app.route('/failures', defaults={'env': app.config['DEFAULT_ENVIRONMENT'],
'show_error_as': app.config['SHOW_ERROR_AS']})
@app.route('/failures/<show_error_as>', defaults={'env': app.config['DEFAULT_ENVIRONMENT']})
@app.route('/<env>/failures', defaults={'show_error_as': app.config['SHOW_ERROR_AS']})
@app.route('/<env>/failures/<show_error_as>')
def failures(env: str, show_error_as: str):
nodes_query = AndOperator()
nodes_query.add(EqualsOperator('latest_report_status', 'failed'))

envs = environments()
check_env(env, envs)
if env != '*':
nodes_query.add(EqualsOperator("catalog_environment", env))

if show_error_as not in ['friendly', 'raw']:
abort(404)

nodes = puppetdb.nodes(
query=nodes_query,
with_status=True,
with_event_numbers=False,
)

failures = []

for node in yield_or_stop(nodes):

report_query = AndOperator()
report_query.add(EqualsOperator('hash', node.latest_report_hash))

reports = puppetdb.reports(
query=report_query,
)

latest_failed_report = next(reports)

for log in latest_failed_report.logs:
if log['level'] not in ['info', 'notice', 'warning']:
if log['source'] != 'Facter':
source = log['source']
message = log['message']
break

if show_error_as == 'friendly':
error = to_html(get_friendly_error(source, message, node.name))
else:
error = get_raw_error(source, message)

failure = {
'certname': node.name,
'timestamp': node.report_timestamp,
'error': error,
'report_hash': node.latest_report_hash,
}
failures.append(failure)

return Response(stream_with_context(
stream_template('failures.html',
failures=failures,
envs=envs,
current_env=env,
current_show_error_as=show_error_as)))