-
Notifications
You must be signed in to change notification settings - Fork 12
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Additionally amended the .gitignore file for macintosh file systems
- Loading branch information
Showing
6 changed files
with
477 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,140 @@ | ||
library('seqHMM') | ||
library('TraMineR') | ||
library('seqinr') | ||
########### gympie civil dawn | ||
#read in the sequence from the fasta file | ||
fastacd2gympie = readBStringSet('/Volumes/Nifty/QUT/Fastafiles/Gympie_letters_civildawn.txt', format="fasta", | ||
nrec=-1L, skip=0L, seek.first.rec=FALSE, use.names=TRUE) | ||
#Create sequences of equal length, therefore remove the last day because it is significantly shorter | ||
#And remove the last one/two minutes of each sequence so they are all equal | ||
fastacd2gympie = fastacd2gympie[1:397] | ||
gympiecd = subseq(fastacd2gympie, 1, 1438) | ||
|
||
#Create a state sequence object that can be used to build a HMM | ||
stateseqgympiecd = seqdef(gympiecd,id = names(gympiecd), missing = '-') | ||
|
||
#Initialize the HMM with 7 different states representing rain, birds, etc.. | ||
init_hmm_gympiecd = build_hmm(observations=stateseqgympiecd, n_states=7) | ||
|
||
#Fit the model to the given observations/sequences | ||
hmm_gympiecd = fit_model(init_hmm_gympiecd) | ||
|
||
#Calculate the hidden state path sequence for each day | ||
hidpaths_gympiecd = hidden_paths(hmm_gympiecd$model) | ||
|
||
#Plot out the hidden state path sequences | ||
jpeg("/Volumes/Nifty/QUT/HMM/hiddenstates/gympiecd.jpeg", 2000, 2000, quality = 100) | ||
ssplot(hmm_gympiecd$model, hidpaths_gympiecd,plots="hidden.paths", type = "I") | ||
dev.off() | ||
|
||
#Show the state transition probabilities and emission probabilties in the form of a graph | ||
jpeg("/Volumes/Nifty/QUT/HMM/statetransitiongympiecd.jpeg", 1000, 1000, quality = 100) | ||
plot(hmm_gympiecd$model,vertex.label='names',combine.slices = 0.05, cpal = colorpalette[[50]]) | ||
dev.off() | ||
|
||
########### woondum civil dawn | ||
fastacd2woondum = readBStringSet('/Volumes/Nifty/QUT/Fastafiles/Woondum_letters_civildawn.txt', format="fasta", | ||
nrec=-1L, skip=0L, seek.first.rec=FALSE, use.names=TRUE) | ||
fastacd2woondum = fastacd2woondum[1:397] | ||
woondumcd = subseq(fastacd2woondum, 1, 1438) | ||
stateseqwoondumcd = seqdef(woondumcd,id = names(woondumcd), missing = '-') | ||
init_hmm_woondumcd = build_hmm(observations=stateseqwoondumcd, n_states=7) | ||
hmm_woondumcd = fit_model(init_hmm_woondumcd) | ||
hidpaths_woondumcd = hidden_paths(hmm_woondumcd$model) | ||
|
||
jpeg("/Volumes/Nifty/QUT/HMM/hiddenstates/woondumcd.jpeg", 2000, 2000, quality = 100) | ||
ssplot(hmm_woondumcd$model, hidpaths_woondumcd,plots="hidden.paths", type = "I") | ||
dev.off() | ||
|
||
jpeg("/Volumes/Nifty/QUT/HMM/statetransitionwoondumcd.jpeg", 1000, 1000, quality = 100) | ||
plot(hmm_woondumcd$model,vertex.label='names',combine.slices = 0.05, cpal = colorpalette[[50]]) | ||
dev.off() | ||
|
||
########### gympie midnight | ||
fastamn2gympie = readBStringSet('/Volumes/Nifty/QUT/Fastafiles/Gympie_letters_midnight.txt', format="fasta", | ||
nrec=-1L, skip=0L, seek.first.rec=FALSE, use.names=TRUE) | ||
|
||
########### woondum midnight | ||
fastamn2woondum = readBStringSet('/Volumes/Nifty/QUT/Fastafiles/Woondum_letters_midnight.txt', format="fasta", | ||
nrec=-1L, skip=0L, seek.first.rec=FALSE, use.names=TRUE) | ||
stateseqwoondum = seqdef(fastamn2woondum,id = names(fastamn2woondum), missing = '-') | ||
init_hmm_woondummn = build_hmm(observations=stateseqwoondum, n_states=7) | ||
hmm_woondummn = fit_model(init_hmm_woondummn) | ||
hidpaths_woondummn = hidden_paths(hmm_woondummn$model) | ||
|
||
#two channels, one for each location civildawn | ||
stateseqs = list(stateseqgympiecd,stateseqwoondumcd) | ||
init_hmm_bothcd = build_hmm(observations=stateseqs, n_states = 7, channel_names = c("Gympie","Woondum")) | ||
hmm_bothcd = fit_model(init_hmm_bothcd) | ||
hidpaths_bothcd = hidden_paths(hmm_bothcd$model) | ||
|
||
jpeg("/Volumes/Nifty/QUT/HMM/hiddenstates/bothcd.jpeg", 2000, 2000, quality = 100) | ||
ssplot(hmm_bothcd$model, hidpaths_bothcd,plots="hidden.paths", type = "I") | ||
dev.off() | ||
|
||
jpeg("/Volumes/Nifty/QUT/HMM/bothcdobservationss.jpeg", 2000, 2000, quality = 100) | ||
ssplot(hmm_bothcd$model, hidpaths_bothcd,plots="obs", type = "I", cpal = colorpalette[[50]]) | ||
dev.off() | ||
|
||
jpeg("/Volumes/Nifty/QUT/HMM/statetransitionbothcd.jpeg", 1000, 1000, quality = 100) | ||
plot(hmm_woondumcd$model,vertex.label='names',combine.slices = 0.05, cpal = colorpalette[[50]]) | ||
dev.off() | ||
|
||
#datasets of gympie and woondum combined civil dawn | ||
combcd = append(gympiecd, woondumcd) | ||
stateseqscd = seqdef(combcd,id = names(combcd), missing = '-', cpal = colorpalette[[50]]) | ||
init_hmm_combcd = build_hmm(observations=stateseqscd, n_states = 7, state_names = list("G","A","V","L","I","F","Y")) | ||
hmm_combcd = fit_model(init_hmm_combcd) | ||
hidpaths_combcd = hidden_paths(hmm_combcd$model) | ||
|
||
jpeg("/Volumes/Nifty/QUT/HMM/hiddenstates/combcd.jpeg", 2000, 2000, quality = 100) | ||
ssplot(hmm_combcd$model, hidpaths_combcd,plots="hidden.paths", type = "I") | ||
dev.off() | ||
|
||
jpeg("/Volumes/Nifty/QUT/HMM/combcdobservations.jpeg", 2000, 2000, quality = 100) | ||
ssplot(hmm_combcd$model, hidpaths_combcd,plots="obs", type = "I", tlim= 1:397) | ||
dev.off() | ||
|
||
jpeg("/Volumes/Nifty/QUT/HMM/statetransitioncombcd.jpeg", 1000, 1000, quality = 100) | ||
plot(hmm_combcd$model,vertex.label='names',combine.slices = 0.05) | ||
dev.off() | ||
|
||
####### split hiddenstatepath data on seasons for Gympie | ||
#days since the start of the experiment that correspond with the start and ending date of the season | ||
winter = c(346:397,1:71) | ||
spring = 72:162 | ||
summer = 163:253 | ||
autumn = 254:345 | ||
|
||
#the hidden state sequences for each season in a stslist format | ||
winter_hidstateseqgympie = hidpaths_combcd[winter,][1:length(winter),] | ||
spring_hidstateseqgympie = hidpaths_combcd[spring,][1:length(spring),] | ||
summer_hidstateseqgympie = hidpaths_combcd[summer,][1:length(summer),] | ||
autumn_hidstateseqgympie = hidpaths_combcd[autumn,][1:length(autumn),] | ||
|
||
#creates a list of the hidden state sequences which can be used to write out a fasta file | ||
makeseqlist = function(season,numberdays){ | ||
seqlist = list() | ||
for (i in seq(1,numberdays,1)){ | ||
seqlist[[i]] = t(season)[,i] | ||
i = i+ 1 | ||
} | ||
return(seqlist) | ||
} | ||
|
||
#Create a list containing the hidden state sequences and write out a fasta file | ||
wintergympiecd = makeseqlist(winter_hidstateseqgympie, length(winter)) | ||
write.fasta(wintergympiecd, names = names(combcd)[winter], file.out = "/Volumes/Nifty/QUT/HMM/hiddenstates/fastafiles/wintergympiecd.txt") | ||
|
||
springgympiecd = makeseqlist(spring_hidstateseqgympie, length(spring)) | ||
write.fasta(springgympiecd, names = names(combcd)[spring], file.out = "/Volumes/Nifty/QUT/HMM/hiddenstates/fastafiles/springgympiecd.txt") | ||
|
||
summergympiecd = makeseqlist(summer_hidstateseqgympie, length(summer)) | ||
write.fasta(summergympiecd, names = names(combcd)[summer], file.out = "/Volumes/Nifty/QUT/HMM/hiddenstates/fastafiles/summergympiecd.txt") | ||
|
||
autumngympiecd = makeseqlist(autumn_hidstateseqgympie, length(autumn)) | ||
write.fasta(autumngympiecd, names = names(combcd)[autumn], file.out = "/Volumes/Nifty/QUT/HMM/hiddenstates/fastafiles/autumngympiecd.txt") | ||
|
||
test = readBStringSet("/Volumes/Nifty/QUT/HMM/hiddenstates/fastafiles/wintergympiecd.txt", format="fasta", | ||
nrec=-1L, skip=0L, seek.first.rec=FALSE, use.names=TRUE) | ||
letterFrequency(test, uniqueLetters(test)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
#Calculate the centroids of the acoustic indices | ||
# woondum = excel file /Volumes/Nifty/QUT/Yvonne_AcousticData_WoondumAndGympie/Cluster50NormalisedIndexFeatures_Gympie2015June22.csv | ||
|
||
#create empty date frame where the centroids values will be stored | ||
centroids = data.frame(matrix(ncol = 12, nrow =50)) | ||
#For different subsets of data based on cluster_list the mean of every column will be calculated | ||
#The mean of every column for every subset is stored in centroids | ||
for (col in seq(3,ncol(woondum))){ | ||
centroids[,col-2] = aggregate(woondum[[col]] ~ cluster_list, mean, data=woondum)[2] | ||
} | ||
|
||
#Ignore the first two columns in woondum; cluster_list & minute_reference | ||
colnames(centroids) = colnames(woondum)[3:14] | ||
|
||
#create an empty data frame to store the medoids | ||
medoids = data.frame(matrix(ncol = 12, nrow =50)) | ||
|
||
#For every subset | ||
for (i in seq(1,50)){ | ||
#create a subset of the data based on cluster number | ||
subset = woondum[which(woondum$cluster_list==i),] | ||
smallest_distance = 1000 | ||
|
||
#Iterate through every row in the subset | ||
for (row in seq(1,nrow(subset))){ | ||
#Calculate the distance between the current row in the subset and the centroid of the given subset | ||
distance = (dist(rbind(subset[row,3:14],centroids[i,]))) | ||
print(distance) | ||
print(row) | ||
|
||
#Save the row that has the smallest distance as the medoid for that subset/cluster | ||
if (distance<smallest_distance){ | ||
smallest_distance = distance | ||
medoids[i,] = subset[row,3:14] | ||
} | ||
} | ||
} | ||
|
||
distmatrix = dist(medoids) | ||
hc = hclust(distmatrix) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
#!/usr/bin/python | ||
from datetime import timedelta, date | ||
import datetime | ||
|
||
#creates a text file with the date and the number of days that went by since the start date of the experiment | ||
with open('/Volumes/Nifty/QUT/Scripts/datelist.txt', 'w') as file: | ||
d = date(2015,06,22) | ||
delta = datetime.timedelta(days=1) | ||
number = 1 | ||
while d <= date(2016,07,23): | ||
file.write(d.strftime("%Y-%m-%d") + ' ' + str(number) + '\n') | ||
d += delta | ||
number += 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
require(ggplot2) | ||
require(ggseqlogo) | ||
library('Biostrings') | ||
l | ||
subset = function(fasta, start, stop){ | ||
subsetseq = c() | ||
for (seq in fasta){ | ||
subsetseq = c(subsetseq, substr(seq,start,stop)) | ||
} | ||
return(subsetseq) | ||
} | ||
|
||
fasta = readBStringSet('/Volumes/Nifty/QUT/Scripts/Gympie_letters_midnight.txt', format="fasta", | ||
nrec=-1L, skip=0L, seek.first.rec=FALSE, use.names=TRUE) | ||
fasta1 = readAAStringSet('/Volumes/Nifty/QUT/Scripts/Gympie_letters_midnight.txt', format="fasta", | ||
nrec=-1L, skip=0L, seek.first.rec=FALSE, use.names=TRUE) | ||
fasta2 = read.fasta('/Volumes/Nifty/QUT/Fastafiles/Gympie_letters_midnight.txt',as.string='TRUE',seqonly='TRUE') | ||
|
||
ggplot() + geom_logo(fasta2,seq_type="other",col_scheme = 'taylor',namespace="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",font="akrobat_bold") + theme_logo() | ||
|
||
fastacd = read.fasta('/Volumes/Nifty/QUT/Scripts/Gympie_letters_civildawn.txt',as.string='TRUE',seqonly='TRUE') | ||
fastacdvec = read.fasta('/Volumes/Nifty/QUT/Fastafiles/Gympie_letters_civildawn.txt') | ||
fastacd2 = readBStringSet('/Volumes/Nifty/QUT/Fastafiles/Gympie_letters_civildawn.txt', format="fasta", | ||
nrec=-1L, skip=0L, seek.first.rec=FALSE, use.names=TRUE) | ||
|
||
ggplot() + geom_logo(fastacd700,method = 'probability',seq_type="other",col_scheme = 'taylor',namespace="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",font="akrobat_bold") + theme_logo() | ||
january = fastacd[1:31] | ||
january100 = subset(january,1,100) | ||
ggplot() + geom_logo(january,method = 'probability',seq_type="other",col_scheme = 'taylor',namespace="BCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",font="akrobat_bold") + theme_logo() | ||
|
||
#Calculate the frequency of each letter per sequence gympie | ||
freq = letterFrequency(fasta, letters = uniqueLetters(fasta)) | ||
freqtotal = sort(colSums(freq),decreasing=TRUE) | ||
freqtotalperc = (freqtotal*100)/sum(freqtotal) | ||
barplot(freqtotalperc,names.arg = names(freqtotal),las='2', col = colorpalette[[50]]) |
Oops, something went wrong.