v1/docs/user-guide/troubleshooting.md
This guide provides solutions to common issues encountered when using the WiFi-DensePose system, including installation problems, hardware connectivity issues, performance optimization, and error resolution.
Run a comprehensive system health check to identify issues:
# Check system status
curl http://localhost:8000/api/v1/system/status
# Run built-in diagnostics
curl http://localhost:8000/api/v1/system/diagnostics
# Check component health
curl http://localhost:8000/api/v1/health
Check system logs for error patterns:
# View recent logs
docker-compose logs --tail=100 wifi-densepose-api
# Search for errors
docker-compose logs | grep -i error
# Check specific component logs
docker-compose logs neural-network
docker-compose logs csi-processor
Monitor system resources:
# Check Docker container resources
docker stats
# Check system resources
htop
nvidia-smi # For GPU monitoring
# Check disk space
df -h
Symptoms:
Solutions:
# Check if port 8000 is in use
netstat -tulpn | grep :8000
lsof -i :8000
# Kill process using the port
sudo kill -9 <PID>
# Add user to docker group
sudo usermod -aG docker $USER
newgrp docker
# Fix file permissions
sudo chown -R $USER:$USER .
# Update Docker Compose
sudo curl -L "https://github.com/docker/compose/releases/latest/download/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose
sudo chmod +x /usr/local/bin/docker-compose
Symptoms:
Solutions:
# Remove unused containers, networks, images
docker system prune -a
# Remove unused volumes
docker volume prune
# Check disk usage
docker system df
# Edit docker-compose.yml to use external storage
volumes:
- /external/storage/data:/app/data
- /external/storage/models:/app/models
Symptoms:
Solutions:
# Ubuntu/Debian
sudo apt update
sudo apt install -y build-essential cmake python3-dev
sudo apt install -y libopencv-dev libffi-dev libssl-dev
# CentOS/RHEL
sudo yum groupinstall -y "Development Tools"
sudo yum install -y python3-devel opencv-devel
# Create clean virtual environment
python3 -m venv venv_clean
source venv_clean/bin/activate
pip install --upgrade pip setuptools wheel
pip install -r requirements.txt
# Install PyTorch with specific CUDA version
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
# Or CPU-only version
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
Symptoms:
Solutions:
# Check CUDA version
nvcc --version
nvidia-smi
# Check PyTorch CUDA support
python -c "import torch; print(torch.cuda.is_available())"
# Install CUDA 11.8 (example)
wget https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run
sudo sh cuda_11.8.0_520.61.05_linux.run
# Set GPU memory limit
export CUDA_VISIBLE_DEVICES=0
export PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512
Symptoms:
Solutions:
# Ping router
ping 192.168.1.1
# Check SSH access
ssh [email protected]
# Test CSI port
telnet 192.168.1.1 5500
# SSH into router and check CSI tools
ssh [email protected]
csi_tool --status
# Restart CSI service
/etc/init.d/csi restart
# Check iptables rules
iptables -L
# Allow CSI port
iptables -A INPUT -p tcp --dport 5500 -j ACCEPT
Symptoms:
Solutions:
# Check signal strength
iwconfig wlan0
# Analyze interference
iwlist wlan0 scan | grep -E "(ESSID|Frequency|Quality)"
# Reduce sampling rate
echo "csi_rate=20" >> /etc/config/wireless
# Change channel
echo "channel=6" >> /etc/config/wireless
uci commit wireless
wifi reload
# Check CSI data statistics
curl http://localhost:8000/api/v1/hardware/csi/stats
# View real-time quality metrics
curl http://localhost:8000/api/v1/hardware/status
Symptoms:
Solutions:
# Reduce batch size
export POSE_PROCESSING_BATCH_SIZE=16
# Lower frame rate
export STREAM_FPS=15
# Disable unnecessary features
export ENABLE_HISTORICAL_DATA=false
# Increase worker processes
export WORKERS=4
# Use process affinity
taskset -c 0-3 python -m src.api.main
Symptoms:
Solutions:
# Reduce batch size
export POSE_PROCESSING_BATCH_SIZE=8
# Enable mixed precision
export ENABLE_MIXED_PRECISION=true
# Clear GPU cache
python -c "import torch; torch.cuda.empty_cache()"
# Watch GPU memory usage
watch -n 1 nvidia-smi
# Check memory allocation
python -c "
import torch
print(f'Allocated: {torch.cuda.memory_allocated()/1024**3:.2f} GB')
print(f'Cached: {torch.cuda.memory_reserved()/1024**3:.2f} GB')
"
Symptoms:
Solutions:
# Use TensorRT optimization
export ENABLE_TENSORRT=true
# Enable model quantization
export MODEL_QUANTIZATION=int8
# Use smaller model variant
export POSE_MODEL_PATH="./models/densepose_mobile.pth"
# Increase batch size (if GPU memory allows)
export POSE_PROCESSING_BATCH_SIZE=64
# Reduce input resolution
export INPUT_RESOLUTION=256
# Skip frames for real-time processing
export FRAME_SKIP_RATIO=2
# Enable multi-threading
export OMP_NUM_THREADS=4
export MKL_NUM_THREADS=4
# Use multiple GPU devices
export CUDA_VISIBLE_DEVICES=0,1
Symptoms:
Solutions:
# Reduce buffer sizes
export CSI_BUFFER_SIZE=500
export STREAM_BUFFER_SIZE=50
# Limit historical data retention
export DATA_RETENTION_HOURS=24
# Enable memory mapping for large files
export USE_MEMORY_MAPPING=true
# Add swap space
sudo fallocate -l 4G /swapfile
sudo chmod 600 /swapfile
sudo mkswap /swapfile
sudo swapon /swapfile
Symptoms:
Solutions:
# Check secret key
echo $SECRET_KEY
# Verify token expiration
curl -X POST http://localhost:8000/api/v1/auth/verify \
-H "Authorization: Bearer <token>"
# Get new token
curl -X POST http://localhost:8000/api/v1/auth/token \
-H "Content-Type: application/json" \
-d '{"username": "admin", "password": "password"}'
# Ensure system time is correct
timedatectl status
sudo ntpdate -s time.nist.gov
Symptoms:
Solutions:
# Increase timeout values
export WEBSOCKET_TIMEOUT=600
export WEBSOCKET_PING_INTERVAL=30
# Enable keep-alive
export WEBSOCKET_KEEPALIVE=true
# Test WebSocket connection
wscat -c ws://localhost:8000/ws/pose
# Check proxy settings
curl -I http://localhost:8000/ws/pose
Symptoms:
Solutions:
# Increase rate limits
export RATE_LIMIT_REQUESTS=1000
export RATE_LIMIT_WINDOW=3600
# Disable rate limiting for development
export ENABLE_RATE_LIMITING=false
# Batch multiple requests
def batch_requests(requests, batch_size=10):
for i in range(0, len(requests), batch_size):
batch = requests[i:i+batch_size]
# Process batch
time.sleep(1) # Rate limiting delay
Symptoms:
Solutions:
# Increase confidence threshold
curl -X PUT http://localhost:8000/api/v1/config \
-H "Content-Type: application/json" \
-d '{"detection": {"confidence_threshold": 0.8}}'
# Recalibrate system
curl -X POST http://localhost:8000/api/v1/system/calibrate
# Check for interference
curl http://localhost:8000/api/v1/hardware/interference
# Use domain-specific model
export POSE_MODEL_PATH="./models/healthcare_optimized.pth"
# Enable post-processing filters
export ENABLE_TEMPORAL_SMOOTHING=true
export ENABLE_OUTLIER_FILTERING=true
Symptoms:
Solutions:
# Adjust tracking thresholds
curl -X PUT http://localhost:8000/api/v1/config \
-H "Content-Type: application/json" \
-d '{
"tracking": {
"max_age": 30,
"min_hits": 3,
"iou_threshold": 0.3
}
}'
# Enable temporal smoothing
export ENABLE_TEMPORAL_SMOOTHING=true
# Use appearance features
export USE_APPEARANCE_FEATURES=true
Symptoms:
Solutions:
# PostgreSQL
sudo systemctl status postgresql
sudo -u postgres psql -c "SELECT version();"
# SQLite
ls -la ./data/wifi_densepose.db
sqlite3 ./data/wifi_densepose.db ".tables"
# Reset database connection
export DATABASE_URL="postgresql://user:password@localhost:5432/wifi_densepose"
# Restart database service
sudo systemctl restart postgresql
# Run database migrations
python -m src.database.migrate
# Reset database (WARNING: Data loss)
python -m src.database.reset --confirm
Symptoms:
Solutions:
# View crash logs
journalctl -u wifi-densepose -f
# Check for segmentation faults
dmesg | grep -i "segfault"
# Restart with Docker
docker-compose restart wifi-densepose-api
# Restart native service
sudo systemctl restart wifi-densepose
# Run with memory debugging
valgrind --tool=memcheck python -m src.api.main
# Check for memory leaks
python -m tracemalloc
Symptoms:
Solutions:
curl -X PUT http://localhost:8000/api/v1/config \
-H "Content-Type: application/json" \
-d '{
"alerts": {
"fall_detection": {
"sensitivity": 0.7,
"notification_delay_seconds": 10
}
}
}'
# Collect domain-specific training data
python -m src.training.collect_healthcare_data
# Retrain model with healthcare data
python -m src.training.train_healthcare_model
Symptoms:
Solutions:
# Define entrance/exit zones
curl -X PUT http://localhost:8000/api/v1/config \
-H "Content-Type: application/json" \
-d '{
"zones": {
"entrance": {
"coordinates": [[0, 0], [100, 50]],
"type": "entrance"
}
}
}'
# Enable zone-based tracking
export ENABLE_ZONE_TRACKING=true
# Adjust dwell time thresholds
export MIN_DWELL_TIME_SECONDS=5
# Profile Python code
python -m cProfile -o profile.stats -m src.api.main
# Analyze profile
python -c "
import pstats
p = pstats.Stats('profile.stats')
p.sort_stats('cumulative').print_stats(20)
"
# Profile CUDA kernels
nvprof python -m src.neural_network.inference
# Use PyTorch profiler
python -c "
import torch
with torch.profiler.profile() as prof:
# Your code here
pass
print(prof.key_averages().table())
"
# Capture CSI packets
sudo tcpdump -i eth0 port 5500 -w csi_capture.pcap
# Analyze with Wireshark
wireshark csi_capture.pcap
# Test network latency
ping -c 100 192.168.1.1 | tail -1
# Test bandwidth
iperf3 -c 192.168.1.1 -t 60
# Monitor system resources
htop
iotop
nethogs
# Monitor GPU
nvidia-smi -l 1
# Monitor Docker containers
docker stats --format "table {{.Container}}\t{{.CPUPerc}}\t{{.MemUsage}}"
# Centralized logging with ELK stack
docker run -d --name elasticsearch elasticsearch:7.17.0
docker run -d --name kibana kibana:7.17.0
# Configure log shipping
echo 'LOGGING_DRIVER=syslog' >> .env
echo 'SYSLOG_ADDRESS=tcp://localhost:514' >> .env
Before contacting support, collect the following information:
# System information
uname -a
cat /etc/os-release
docker --version
python --version
# Application logs
docker-compose logs --tail=1000 > logs.txt
# Configuration
cat .env > config.txt
curl http://localhost:8000/api/v1/system/status > status.json
# Hardware information
lscpu
free -h
nvidia-smi > gpu_info.txt
Include the following information:
Environment Details:
Steps to Reproduce:
Expected vs Actual Behavior:
Additional Context:
For critical production issues:
Immediate Actions:
# Stop the system safely
curl -X POST http://localhost:8000/api/v1/system/stop
# Backup current data
cp -r ./data ./data_backup_$(date +%Y%m%d_%H%M%S)
# Restart with minimal configuration
export MOCK_HARDWARE=true
docker-compose up -d
Rollback Procedures:
# Rollback to previous version
git checkout <previous-tag>
docker-compose down
docker-compose up -d
# Restore data backup
rm -rf ./data
cp -r ./data_backup_<timestamp> ./data
Contact Information:
Remember: Most issues can be resolved by checking logs, verifying configuration, and ensuring proper hardware setup. When in doubt, start with the basic diagnostics and work your way through the troubleshooting steps systematically.
For additional help, see: