diff --git a/entrypoint.sh b/entrypoint.sh index ee8cf3b..6841668 100755 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -33,7 +33,11 @@ echo "Load waarnemingen observation data via: python manage.py load_waarnemingen # Start Gunicorn echo "Starting Gunicorn..." -gunicorn --workers 3 --bind 0.0.0.0:8000 vespadb.wsgi:application & +gunicorn --workers 3 \ + --timeout 300 \ + --keep-alive 65 \ + --bind 0.0.0.0:8000 \ + vespadb.wsgi:application & # Wait for Gunicorn to start sleep 5 diff --git a/nginx.conf b/nginx.conf index 61e9de4..c6b55f5 100644 --- a/nginx.conf +++ b/nginx.conf @@ -1,7 +1,9 @@ -worker_processes 1; +worker_processes auto; events { - worker_connections 1024; + worker_connections 4096; + multi_accept on; + use epoll; } http { @@ -15,27 +17,29 @@ http { http2_idle_timeout 5m; # Global timeout settings - proxy_connect_timeout 300; - proxy_send_timeout 300; - proxy_read_timeout 300; - send_timeout 300; + proxy_connect_timeout 600; + proxy_send_timeout 600; + proxy_read_timeout 600; + send_timeout 600; + keepalive_timeout 650; sendfile on; - keepalive_timeout 65; + tcp_nopush on; + tcp_nodelay on; # Buffering settings for large responses - proxy_buffering off; - proxy_request_buffering off; - proxy_buffer_size 16k; - proxy_buffers 8 16k; - proxy_busy_buffers_size 32k; + proxy_buffer_size 128k; + proxy_buffers 8 256k; + proxy_busy_buffers_size 256k; + proxy_temp_file_write_size 256k; + proxy_max_temp_file_size 0; server { - listen 80 http2; # Added http2 + listen 80 http2; server_name uat.vespadb.be; - # Increase client body size limit if needed - client_max_body_size 20M; + # Increase client body size limit + client_max_body_size 0; # Disabled limit for large files location /static/ { alias /workspaces/vespadb/collected_static/; @@ -57,22 +61,25 @@ http { proxy_set_header Connection ""; # Timeouts - proxy_connect_timeout 300s; - proxy_send_timeout 300s; - proxy_read_timeout 300s; - - # Buffer settings - proxy_buffering off; - proxy_request_buffering off; + proxy_connect_timeout 600s; + proxy_send_timeout 600s; + proxy_read_timeout 600s; + + # Buffer settings for large files + proxy_buffering on; + proxy_request_buffering on; + proxy_buffer_size 128k; + proxy_buffers 8 256k; + proxy_busy_buffers_size 256k; } } server { - listen 80 http2; # Added http2 + listen 80 http2; server_name data.vespawatch.be; - # Increase client body size limit if needed - client_max_body_size 20M; + # Increase client body size limit + client_max_body_size 0; # Disabled limit for large files location /static/ { alias /workspaces/vespadb/collected_static/; @@ -94,13 +101,16 @@ http { proxy_set_header Connection ""; # Timeouts - proxy_connect_timeout 300s; - proxy_send_timeout 300s; - proxy_read_timeout 300s; - - # Buffer settings - proxy_buffering off; - proxy_request_buffering off; + proxy_connect_timeout 600s; + proxy_send_timeout 600s; + proxy_read_timeout 600s; + + # Buffer settings for large files + proxy_buffering on; + proxy_request_buffering on; + proxy_buffer_size 128k; + proxy_buffers 8 256k; + proxy_busy_buffers_size 256k; } } } \ No newline at end of file diff --git a/vespadb/observations/views.py b/vespadb/observations/views.py index 2fbca5c..e7808bc 100644 --- a/vespadb/observations/views.py +++ b/vespadb/observations/views.py @@ -899,15 +899,18 @@ def create_csv_generator( @method_decorator(ratelimit(key="ip", rate="60/m", method="GET", block=True)) @action(detail=False, methods=["get"], permission_classes=[AllowAny]) - def export(self, request: HttpRequest) -> StreamingHttpResponse: - """ - Export observations as CSV using streaming response with improved error handling - and performance optimizations. - """ + def export(self, request: HttpRequest) -> Union[FileResponse, JsonResponse]: + """Export observations as CSV using temporary file approach.""" + temp_file = None + temp_file_path = None + try: - # Validate export format - if request.query_params.get("export_format", "csv").lower() != "csv": - return JsonResponse({"error": "Only CSV export is supported"}, status=400) + # Create temporary file + temp_file = tempfile.NamedTemporaryFile(mode='w+', delete=False) + temp_file_path = temp_file.name + + writer = csv.writer(temp_file) + writer.writerow(CSV_HEADERS) # Get user permissions if request.user.is_authenticated: @@ -917,32 +920,84 @@ def export(self, request: HttpRequest) -> StreamingHttpResponse: user_municipality_ids = set() is_admin = False - # Get filtered queryset - queryset = self.filter_queryset(self.get_queryset()) - - # Create the StreamingHttpResponse - response = StreamingHttpResponse( - streaming_content=self._generate_csv_content( - queryset, is_admin, user_municipality_ids - ), - content_type='text/csv' + # Get filtered queryset with optimizations + queryset = self.filter_queryset( + self.get_queryset().select_related('province', 'municipality', 'reserved_by') ) + + # Set a smaller chunk size for better memory management + chunk_size = 500 + total_count = queryset.count() + processed = 0 + + # Process in chunks + for start in range(0, total_count, chunk_size): + chunk = queryset[start:start + chunk_size] + + for observation in chunk: + try: + row_data = self._prepare_row_data( + observation, + is_admin, + user_municipality_ids + ) + writer.writerow(row_data) + except Exception as e: + logger.error(f"Error processing observation {observation.id}: {str(e)}") + continue + + processed += len(chunk) + logger.info(f"Export progress: {(processed/total_count)*100:.1f}%") + + # Make sure all data is written and file is closed + temp_file.flush() + os.fsync(temp_file.fileno()) + temp_file.close() - # Important headers - filename = f"observations_export_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.csv" - response['Content-Disposition'] = f'attachment; filename="{filename}"' - response['X-Accel-Buffering'] = 'no' + # Open the file for reading and create response + filename=f"observations_export_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.csv" + + response = FileResponse( + open(temp_file_path, 'rb'), + content_type='text/csv', + as_attachment=True, + filename=filename + ) + # Set headers more explicitly + response['Content-Disposition'] = f'attachment; filename="{filename}"; filename*=UTF-8\'\'{filename}' + response['Content-Type'] = 'text/csv; charset=utf-8' + response['Content-Length'] = os.path.getsize(temp_file_path) response['Cache-Control'] = 'no-cache' + response['X-Accel-Buffering'] = 'no' + + # Schedule file cleanup after response is sent + def cleanup_temp_file(response: FileResponse) -> Any: + """.""" + try: + os.unlink(temp_file_path) + except: + pass + return response + + response.close = cleanup_temp_file.__get__(response, FileResponse) return response except Exception as e: logger.exception("Export failed") + # Cleanup in case of error + if temp_file: + temp_file.close() + if temp_file_path and os.path.exists(temp_file_path): + try: + os.unlink(temp_file_path) + except: + pass return JsonResponse( {"error": "Export failed. Please try again or contact support."}, status=500 ) - + def get_status(self, observation: Observation) -> str: """Determine observation status based on eradication data.""" logger.debug("Getting status for observation %s", observation.eradication_result)