Initial check in of R code

Additionally amended the .gitignore file for macintosh file systems
QutEcoacoustics · Oct 30, 2017 · 41d1ee4 · 41d1ee4
1 parent 38582d5
commit 41d1ee4
Show file tree

Hide file tree

Showing 6 changed files with 477 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -221,3 +221,32 @@ FakesAssemblies/
 
 ### Vagrant ###
 .vagrant/
+
+### MAC OS ###
+# General
+.DS_Store
+.AppleDouble
+.LSOverride
+
+# Icon must end with two \r
+Icon
+
+
+# Thumbnails
+._*
+
+# Files that might appear in the root of a volume
+.DocumentRevisions-V100
+.fseventsd
+.Spotlight-V100
+.TemporaryItems
+.Trashes
+.VolumeIcon.icns
+.com.apple.timemachine.donotpresent
+
+# Directories potentially created on remote AFP share
+.AppleDB
+.AppleDesktop
+Network Trash Folder
+Temporary Items
+.apdisk
diff --git a/AudioAnalysis/RCode/Aniek/HMM.r b/AudioAnalysis/RCode/Aniek/HMM.r
@@ -0,0 +1,140 @@
+library('seqHMM')
+library('TraMineR')
+library('seqinr')
+########### gympie civil dawn
+#read in the sequence from the fasta file
+fastacd2gympie = readBStringSet('/Volumes/Nifty/QUT/Fastafiles/Gympie_letters_civildawn.txt', format="fasta",
+                                nrec=-1L, skip=0L, seek.first.rec=FALSE, use.names=TRUE)
+#Create sequences of equal length, therefore remove the last day because it is significantly shorter
+#And remove the last one/two minutes of each sequence so they are all equal
+fastacd2gympie = fastacd2gympie[1:397]
+gympiecd = subseq(fastacd2gympie, 1, 1438)
+
+#Create a state sequence object that can be used to build a HMM
+stateseqgympiecd = seqdef(gympiecd,id = names(gympiecd), missing = '-')
+
+#Initialize the HMM with 7 different states representing rain, birds, etc..
+init_hmm_gympiecd = build_hmm(observations=stateseqgympiecd, n_states=7)
+
+#Fit the model to the given observations/sequences
+hmm_gympiecd = fit_model(init_hmm_gympiecd)
+
+#Calculate the hidden state path sequence for each day
+hidpaths_gympiecd = hidden_paths(hmm_gympiecd$model)
+
+#Plot out the hidden state path sequences
+jpeg("/Volumes/Nifty/QUT/HMM/hiddenstates/gympiecd.jpeg", 2000, 2000, quality = 100)
+ssplot(hmm_gympiecd$model, hidpaths_gympiecd,plots="hidden.paths", type = "I")
+dev.off()
+
+#Show the state transition probabilities and emission probabilties in the form of a graph
+jpeg("/Volumes/Nifty/QUT/HMM/statetransitiongympiecd.jpeg", 1000, 1000, quality = 100)
+plot(hmm_gympiecd$model,vertex.label='names',combine.slices = 0.05, cpal = colorpalette[[50]])
+dev.off()
+
+########### woondum civil dawn
+fastacd2woondum = readBStringSet('/Volumes/Nifty/QUT/Fastafiles/Woondum_letters_civildawn.txt', format="fasta",
+                                nrec=-1L, skip=0L, seek.first.rec=FALSE, use.names=TRUE)
+fastacd2woondum = fastacd2woondum[1:397]
+woondumcd = subseq(fastacd2woondum, 1, 1438)
+stateseqwoondumcd = seqdef(woondumcd,id = names(woondumcd), missing = '-')
+init_hmm_woondumcd = build_hmm(observations=stateseqwoondumcd, n_states=7)
+hmm_woondumcd = fit_model(init_hmm_woondumcd)
+hidpaths_woondumcd = hidden_paths(hmm_woondumcd$model)
+
+jpeg("/Volumes/Nifty/QUT/HMM/hiddenstates/woondumcd.jpeg", 2000, 2000, quality = 100)
+ssplot(hmm_woondumcd$model, hidpaths_woondumcd,plots="hidden.paths", type = "I")
+dev.off()
+
+jpeg("/Volumes/Nifty/QUT/HMM/statetransitionwoondumcd.jpeg", 1000, 1000, quality = 100)
+plot(hmm_woondumcd$model,vertex.label='names',combine.slices = 0.05, cpal = colorpalette[[50]])
+dev.off()
+
+########### gympie midnight
+fastamn2gympie = readBStringSet('/Volumes/Nifty/QUT/Fastafiles/Gympie_letters_midnight.txt', format="fasta",
+                                nrec=-1L, skip=0L, seek.first.rec=FALSE, use.names=TRUE)
+
+########### woondum midnight
+fastamn2woondum = readBStringSet('/Volumes/Nifty/QUT/Fastafiles/Woondum_letters_midnight.txt', format="fasta",
+                                 nrec=-1L, skip=0L, seek.first.rec=FALSE, use.names=TRUE)
+stateseqwoondum = seqdef(fastamn2woondum,id = names(fastamn2woondum), missing = '-')
+init_hmm_woondummn = build_hmm(observations=stateseqwoondum, n_states=7)
+hmm_woondummn = fit_model(init_hmm_woondummn)
+hidpaths_woondummn = hidden_paths(hmm_woondummn$model)
+
+#two channels, one for each location civildawn
+stateseqs = list(stateseqgympiecd,stateseqwoondumcd)
+init_hmm_bothcd = build_hmm(observations=stateseqs, n_states = 7, channel_names = c("Gympie","Woondum"))
+hmm_bothcd = fit_model(init_hmm_bothcd)
+hidpaths_bothcd = hidden_paths(hmm_bothcd$model)
+
+jpeg("/Volumes/Nifty/QUT/HMM/hiddenstates/bothcd.jpeg", 2000, 2000, quality = 100)
+ssplot(hmm_bothcd$model, hidpaths_bothcd,plots="hidden.paths", type = "I")
+dev.off()
+
+jpeg("/Volumes/Nifty/QUT/HMM/bothcdobservationss.jpeg", 2000, 2000, quality = 100)
+ssplot(hmm_bothcd$model, hidpaths_bothcd,plots="obs", type = "I", cpal = colorpalette[[50]])
+dev.off()
+
+jpeg("/Volumes/Nifty/QUT/HMM/statetransitionbothcd.jpeg", 1000, 1000, quality = 100)
+plot(hmm_woondumcd$model,vertex.label='names',combine.slices = 0.05, cpal = colorpalette[[50]])
+dev.off()
+
+#datasets of gympie and woondum combined civil dawn
+combcd = append(gympiecd, woondumcd)
+stateseqscd = seqdef(combcd,id = names(combcd), missing = '-', cpal = colorpalette[[50]])
+init_hmm_combcd = build_hmm(observations=stateseqscd, n_states = 7, state_names = list("G","A","V","L","I","F","Y"))
+hmm_combcd = fit_model(init_hmm_combcd)
+hidpaths_combcd = hidden_paths(hmm_combcd$model)
+
+jpeg("/Volumes/Nifty/QUT/HMM/hiddenstates/combcd.jpeg", 2000, 2000, quality = 100)
+ssplot(hmm_combcd$model, hidpaths_combcd,plots="hidden.paths", type = "I")
+dev.off()
+
+jpeg("/Volumes/Nifty/QUT/HMM/combcdobservations.jpeg", 2000, 2000, quality = 100)
+ssplot(hmm_combcd$model, hidpaths_combcd,plots="obs", type = "I", tlim= 1:397)
+dev.off()
+
+jpeg("/Volumes/Nifty/QUT/HMM/statetransitioncombcd.jpeg", 1000, 1000, quality = 100)
+plot(hmm_combcd$model,vertex.label='names',combine.slices = 0.05)
+dev.off()
+
+####### split hiddenstatepath data on seasons for Gympie
+#days since the start of the experiment that correspond with the start and ending date of the season
+winter =  c(346:397,1:71)
+spring = 72:162
+summer = 163:253
+autumn = 254:345
+
+#the hidden state sequences for each season in a stslist format 
+winter_hidstateseqgympie = hidpaths_combcd[winter,][1:length(winter),]
+spring_hidstateseqgympie = hidpaths_combcd[spring,][1:length(spring),]
+summer_hidstateseqgympie = hidpaths_combcd[summer,][1:length(summer),]
+autumn_hidstateseqgympie = hidpaths_combcd[autumn,][1:length(autumn),]
+
+#creates a list of the hidden state sequences which can be used to write out a fasta file
+makeseqlist = function(season,numberdays){
+  seqlist = list()
+  for (i in seq(1,numberdays,1)){
+    seqlist[[i]] = t(season)[,i]
+    i = i+ 1
+  }
+return(seqlist)
+}
+
+#Create a list containing the hidden state sequences and write out a fasta file
+wintergympiecd = makeseqlist(winter_hidstateseqgympie, length(winter))
+write.fasta(wintergympiecd, names = names(combcd)[winter], file.out = "/Volumes/Nifty/QUT/HMM/hiddenstates/fastafiles/wintergympiecd.txt")
+
+springgympiecd = makeseqlist(spring_hidstateseqgympie, length(spring))
+write.fasta(springgympiecd, names = names(combcd)[spring], file.out = "/Volumes/Nifty/QUT/HMM/hiddenstates/fastafiles/springgympiecd.txt")
+
+summergympiecd = makeseqlist(summer_hidstateseqgympie, length(summer))
+write.fasta(summergympiecd, names = names(combcd)[summer], file.out = "/Volumes/Nifty/QUT/HMM/hiddenstates/fastafiles/summergympiecd.txt")
+
+autumngympiecd = makeseqlist(autumn_hidstateseqgympie, length(autumn))
+write.fasta(autumngympiecd, names = names(combcd)[autumn], file.out = "/Volumes/Nifty/QUT/HMM/hiddenstates/fastafiles/autumngympiecd.txt")
+
+test = readBStringSet("/Volumes/Nifty/QUT/HMM/hiddenstates/fastafiles/wintergympiecd.txt", format="fasta",
+                      nrec=-1L, skip=0L, seek.first.rec=FALSE, use.names=TRUE)
+letterFrequency(test, uniqueLetters(test))
diff --git a/AudioAnalysis/RCode/Aniek/createmedoids.r b/AudioAnalysis/RCode/Aniek/createmedoids.r
@@ -0,0 +1,40 @@
+#Calculate the centroids of the acoustic indices
+# woondum = excel file /Volumes/Nifty/QUT/Yvonne_AcousticData_WoondumAndGympie/Cluster50NormalisedIndexFeatures_Gympie2015June22.csv
+
+#create empty date frame where the centroids values will be stored
+centroids = data.frame(matrix(ncol = 12, nrow =50))
+#For different subsets of data based on cluster_list the mean of every column will be calculated
+#The mean of every column for every subset is stored in centroids
+for (col in seq(3,ncol(woondum))){
+  centroids[,col-2] = aggregate(woondum[[col]] ~ cluster_list, mean, data=woondum)[2]
+}
+
+#Ignore the first two columns in woondum; cluster_list & minute_reference
+colnames(centroids) = colnames(woondum)[3:14]
+
+#create an empty data frame to store the medoids
+medoids = data.frame(matrix(ncol = 12, nrow =50))
+
+#For every subset
+for (i in seq(1,50)){
+  #create a subset of the data based on cluster number
+  subset = woondum[which(woondum$cluster_list==i),]
+  smallest_distance = 1000
+
+  #Iterate through every row in the subset
+  for (row in seq(1,nrow(subset))){
+    #Calculate the distance between the current row in the subset and the centroid of the given subset
+    distance = (dist(rbind(subset[row,3:14],centroids[i,])))
+    print(distance)
+    print(row)
+
+    #Save the row that has the smallest distance as the medoid for that subset/cluster
+    if (distance<smallest_distance){
+      smallest_distance = distance
+      medoids[i,] = subset[row,3:14]
+    }
+  }
+}
+
+distmatrix = dist(medoids)
+hc = hclust(distmatrix)
diff --git a/AudioAnalysis/RCode/Aniek/datelist.py b/AudioAnalysis/RCode/Aniek/datelist.py
@@ -0,0 +1,13 @@
+#!/usr/bin/python
+from datetime import timedelta, date
+import datetime
+
+#creates a text file with the date and the number of days that went by since the start date of the experiment
+with open('/Volumes/Nifty/QUT/Scripts/datelist.txt', 'w') as file:
+	d = date(2015,06,22)
+	delta = datetime.timedelta(days=1)
+	number = 1
+	while d <= date(2016,07,23):
+	    file.write(d.strftime("%Y-%m-%d") + ' ' + str(number) + '\n')
+	    d += delta
+	    number += 1
diff --git a/AudioAnalysis/RCode/Aniek/seqlogo.r b/AudioAnalysis/RCode/Aniek/seqlogo.r
@@ -0,0 +1,35 @@
+require(ggplot2)
+require(ggseqlogo)
+library('Biostrings')
+l
+subset = function(fasta, start, stop){
+  subsetseq = c()
+  for (seq in fasta){
+    subsetseq = c(subsetseq, substr(seq,start,stop))
+  }
+  return(subsetseq)
+}
+
+fasta = readBStringSet('/Volumes/Nifty/QUT/Scripts/Gympie_letters_midnight.txt', format="fasta",
+               nrec=-1L, skip=0L, seek.first.rec=FALSE, use.names=TRUE)
+fasta1 = readAAStringSet('/Volumes/Nifty/QUT/Scripts/Gympie_letters_midnight.txt', format="fasta",
+                        nrec=-1L, skip=0L, seek.first.rec=FALSE, use.names=TRUE)
+fasta2 = read.fasta('/Volumes/Nifty/QUT/Fastafiles/Gympie_letters_midnight.txt',as.string='TRUE',seqonly='TRUE')
+
+ggplot() + geom_logo(fasta2,seq_type="other",col_scheme = 'taylor',namespace="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",font="akrobat_bold") + theme_logo()
+
+fastacd = read.fasta('/Volumes/Nifty/QUT/Scripts/Gympie_letters_civildawn.txt',as.string='TRUE',seqonly='TRUE')
+fastacdvec = read.fasta('/Volumes/Nifty/QUT/Fastafiles/Gympie_letters_civildawn.txt')
+fastacd2 = readBStringSet('/Volumes/Nifty/QUT/Fastafiles/Gympie_letters_civildawn.txt', format="fasta",
+                          nrec=-1L, skip=0L, seek.first.rec=FALSE, use.names=TRUE)
+
+ggplot() + geom_logo(fastacd700,method = 'probability',seq_type="other",col_scheme = 'taylor',namespace="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",font="akrobat_bold") + theme_logo()
+january = fastacd[1:31]
+january100 = subset(january,1,100)
+ggplot() + geom_logo(january,method = 'probability',seq_type="other",col_scheme = 'taylor',namespace="BCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",font="akrobat_bold") + theme_logo()
+
+#Calculate the frequency of each letter per sequence gympie
+freq = letterFrequency(fasta, letters = uniqueLetters(fasta))
+freqtotal = sort(colSums(freq),decreasing=TRUE)
+freqtotalperc = (freqtotal*100)/sum(freqtotal)
+barplot(freqtotalperc,names.arg = names(freqtotal),las='2', col = colorpalette[[50]])