- Go to the datasets directory and check its contents:
cd training/datasets
ls -l
- Print the file to the screen.
cat birdstrikes.csv
- Explore the csv
less birdstrikes.csv
- Print the first 20 lines to the file to the screen
head -n 20 birdstrikes.csv
- What is-n
? Check it in the manual
man head
- Check the last 10 lines of the file
tail -n 10 birdstrikes.csv
- Excercise: check last line of the file
tail -n 1 birdstrikes.csv
- Put the first 10 lines into an other file
head -n 10 birdstrikes.csv > first10.csv
- Excercise: show the 10th line of the csv.
tail -1 first10.csv
- Excercise: show the 5th line of the csv.
head -n 5 birdstrikes.csv > first5.csv
tail -1 first5.csv
- we can do this with 1 command
head -n 5 birdstrikes.csv | tail -n 1
- Excercise: put the 5th line into the 5thline.csv
head -n 5 birdstrikes.csv | tail -n 1 > 5thline.csv
- Only show incidents from California
cat birdstrikes.csv | grep California
grep -v
- Only show incidents NOT with Airplanes
cat birdstrikes.csv | grep -v Airplane
- Excercise: show the first 3 Helicopter incidents NOT in Colorado
cat birdstrikes.csv | grep Helicopter | grep -v Colorado | head -3
grep -i
- Ignore case
cat birdstrikes.csv | grep -i airplane
Regular expressions
- Check lines that contain.
cat birdstrikes.csv | grep '.'
cat birdstrikes.csv | grep '\.'
cat birdstrikes.csv | grep -F '\.'
More Regular expressions
- Check lines that contain.
cat birdstrikes.csv | grep -E '^1'
cat birdstrikes.csv | grep -E ',0$'
cat birdstrikes.csv | grep -E '^[^,]'
cat birdstrikes.csv | cut -d, -f5 | grep -E '^$'
- Excercise: List those files that don't start with 1
cat birdstrikes.csv | grep -E '^[^1]'
cat birdstrikes.csv | grep -v '^1'
- Excercise: List those files that don't have empty values
cat birdstrikes.csv | grep -v ',,' | grep -vE '^,' | grep -vE ',$'
- show the line, word and character count of birdstrikes
wc birdstrikes.csv
- Excercise: show the word, line and character count of the first 10 lines
head -n 10 birdstrikes.csv | wc
- Excercise: how many incidents were in California (only output line count)
cat birdstrikes.csv | grep California | wc -l
- Display only the aircraft and the flight_date columns
cat birdstrikes.csv | cut -d, -f2,3
- Excercise: display only the state and the bird size columns of Airplane accidents
cat birdstrikes.csv | grep Airplane | cut -d, -f5,9
- Excercise: How many incidents happened that were $>0
cat birdstrikes.csv | cut -d, -f10 | grep -v '^0$' | wc -l
- Sort this file
sort birdstrikes.csv
sort -k -t
- Sort by feet above ground, high values firsr
cat birdstrikes.csv | sort -k11 -t, -n -r | less
- Excercise: Which was the most expensive incident?
cat birdstrikes.csv | sort -k10 -t, -n | tail -1
- Excercise: In which Area did the most expensive incident happen that was caused by a Small bird?
cat birdstrikes.csv | sort -t, -k10 -n | grep Small | cut -d, -f6 | head -1
sort | uniq
- What kind of bird sizes are there?
cat birdstrikes.csv | cut -d, -f9 | sort | uniq
- Excercise: In how many states did accidents happen?
cat birdstrikes.csv | cut -d, -f6 | sort | uniq | wc -l
uniq -c
- How many incidents were there by state?
cat birdstrikes.csv | cut -d, -f6 | sort | uniq -c
- Excercise: How many incidents were there by Airlines. Output should be sorted by airline
cat birdstrikes.csv | cut -d, -f5 | sort | uniq -c | sort -t' ' -k2 -n
- Sum costs
cat birdstrikes.csv | cut -d, -f10 | awk 'BEGIN { s=0; } { s = s + $1; } END {print s; }'
- Excercise - Get the number of lines with awk
cat birdstrikes.csv | awk 'BEGIN { s=0; } { s = s + 1 } END {print s; }'
awk NF
- Is the csv well formatted - get the number of columns? cat birdstrikes.csv | awk -F, '{ print $NF }' | sort | uniq -c -
- Replace things
sed 's/,/;/g'
sed '1d' # deletes the header
* `awk` -
cat birdstrikes.csv | cut -d, -f5,10 | sed 1d | sort -t, | awk -F, 'BEGIN { last="";sum=0; } { if (last != $1){ if (last != ""){ print last","sum;} last = $1;sum=0;} sum = sum + $2} END{print last","sum;}'
* `bash script` -
Write a script that gets the first column
#mcedit firstcolumn cut -d, -f1
* `bash` -
script parameters
echo $1 cat
* `$()` - Create stat script
mcedit stats
echo header
* *Excercise*:
Create a script that prints:
- Nr of lines
- First 10 lines
- Last 10 lines
- number of empty lines