Skip to content

Commit

Permalink
Example 1 running.
Browse files Browse the repository at this point in the history
  • Loading branch information
jbkalmbach committed Feb 1, 2018
1 parent e1bd794 commit 20ea1d0
Show file tree
Hide file tree
Showing 5 changed files with 108 additions and 17 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@
# Emacs backups
*.*~
*.pyc
examples/.ipynb_checkpoints
Binary file added data/Inst.10E10.1Z.spec.gz
Binary file not shown.
Binary file added data/Inst.64E08.1Z.spec.gz
Binary file not shown.
105 changes: 96 additions & 9 deletions siggi/calcIG.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
from __future__ import division

import numpy as np
from . import Sed
from sklearn.neighbors.kde import KernelDensity
from . import Sed, Bandpass
__all__ = ["calcIG"]

class calcIG(object):
Expand All @@ -19,30 +22,114 @@ def __init__(self, filter_dict, sed_list):
flambda=sed_obj.flambda)
sed_copy.resampleSED(wavelen_match=filter_dict.values()[0].wavelen)
sed_copy.flambda[np.where(np.isnan(sed_copy.flambda))] = 0.
f_norm = sed_copy.calcFluxNorm(-10.0, filter_dict.values()[0])
imsimBand = Bandpass()
imsimBand.imsimBandpass()
f_norm = sed_copy.calcFluxNorm(-10.0, imsimBand)
sed_copy.multiplyFluxNorm(f_norm)
self._sed_list.append(sed_copy)
self._flat_sed = Sed()
self._flat_sed.setFlatSED(wavelen_min=99., wavelen_max=1500.)
f_norm = self._flat_sed.calcFluxNorm(-15.0, imsimBand)
self._flat_sed.multiplyFluxNorm(f_norm)
self._flat_sed.resampleSED(wavelen_match=self._sed_list[0].wavelen)

return

def draw_colors(self, num_points):
def draw_colors(self, num_points, random_seed=17):

np.random.seed(random_seed)

mags_list = []
color_list = []
true_sed_list = []

sed_on = 0
sed_copy_1 = Sed()
for sed_obj in self._sed_list:
flux_list = self._filter_dict.fluxListForSed(sed_obj)
#flux_list = self._filter_dict.fluxListForSed(sed_obj)
sed_copy_1.setSED(wavelen=sed_obj.wavelen, flambda=sed_obj.flambda)
for i in range(num_points):
flux_with_errors = flux_list + np.random.normal(loc=0.0,
scale=np.sqrt(flux_list)*1e1,
size=2)
mags = [sed_obj.magFromFlux(f) for f in flux_with_errors]
### TODO: Add realistic errors
err_val = np.random.normal(loc=0.0, scale=np.sqrt(sed_obj.flambda))
#print(err_val)
sed_copy_1.flambda = sed_obj.flambda + .4*err_val
sed_copy_1.flambda += self._flat_sed.flambda
sed_copy_1.flambda[sed_copy_1.flambda <= 0.] = 0.
#print(sed_copy_1.flambda[400])
#flux_with_errors = flux_list +
flux_with_errors = self._filter_dict.fluxListForSed(sed_copy_1)
#flux_with_errors = np.random.normal(loc=flux_list,
# scale=np.sqrt(flux_list)*1000.,
# size=len(flux_list))
#flux_with_errors = np.array(flux_with_errors)
#flux_with_errors[np.where(flux_with_errors <= 0.)] = 0.
mags = [sed_copy_1.magFromFlux(f) for f in flux_with_errors]
mags_list.append(mags)
color_list.append([mags[i] - mags[i-1]
for i in range(len(mags)-1)])
true_sed_list.append(sed_on)
sed_on += 1

return mags_list, color_list, true_sed_list
return np.array(mags_list), np.array(color_list), np.array(true_sed_list)

def calc_density(self, colors, truth_vals, bandwidth=None):

if bandwidth is None:
bandwidth = 0.05# np.ptp(colors)/20.

kde_total = KernelDensity(kernel='gaussian',
bandwidth=bandwidth)
kde_total.fit(colors)

kde_list = []

for y_i in np.unique(truth_vals):
kde_1 = KernelDensity(kernel='gaussian',
bandwidth=bandwidth)
kde_1.fit(colors[np.where(truth_vals == y_i)])
kde_list.append(kde_1)

kde_lists = [kde_total, kde_list]

return kde_lists

def calc_h(self, truth_vals):

h_sum = 0
total_y = len(truth_vals)

for y_i in np.unique(truth_vals):
size_y_i = len(np.where(truth_vals == y_i)[0])
h_sum += -(size_y_i/total_y)*np.log2(size_y_i/total_y)

return h_sum

def calc_hyx(self, kde_lists):

hyx_sum = 0
total_kde, y_kde_list = kde_lists
num_y = len(y_kde_list)
dens_step = 0.01
dens_range = np.arange(-2., 2., dens_step).reshape(-1,1)
total_dens = np.exp(total_kde.score_samples(dens_range))

for idx in range(num_y):
y_dens = np.exp(y_kde_list[idx].score_samples(dens_range))
for x_val, yx_val in zip(total_dens, y_dens):
p_x_y = yx_val*dens_step/num_y
p_x = x_val*dens_step
if p_x_y > 0.:
hyx_sum += p_x_y*np.log2(p_x_y/p_x)

return -1.*hyx_sum

def calc_IG(self):

mags, colors, truth = self.draw_colors(50*len(self._sed_list))
k = self.calc_density(colors, truth)
hy_sum = self.calc_h(truth)
hyx_sum = self.calc_hyx(k)

info_gain = hy_sum - hyx_sum

return info_gain
19 changes: 11 additions & 8 deletions siggi/spectra.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,24 +13,27 @@ class spectra(object):

def __init__(self):

data = sdss_corrected_spectra.fetch_sdss_corrected_spectra()
self.spectra = sdss_corrected_spectra.reconstruct_spectra(data)
self.wavelen = sdss_corrected_spectra.compute_wavelengths(data)

return

def get_red_spectrum(self):

spec = self.spectra[3100]/np.max(self.spectra[3100])
sed_obj = Sed()
sed_obj.setSED(self.wavelen/10., spec)
sed_obj.readSED_flambda('../data/Inst.10E10.1Z.spec.gz')

return sed_obj

def get_blue_spectrum(self):

spec = self.spectra[684]/np.max(self.spectra[684])
sed_obj = Sed()
sed_obj.setSED(self.wavelen/10., spec)
sed_obj.readSED_flambda('../data/Inst.64E08.1Z.spec.gz')

return sed_obj

def get_sigmoid_spectrum(self, lam_0=364.6):

wavelen = np.arange(99., 2400.05, 0.1)
spec = 1 / np.exp(wavelen - lam_0)
sed_obj = Sed()
sed_obj.setSED(wavelen=wavelen, flambda=spec)

return sed_obj

0 comments on commit 20ea1d0

Please sign in to comment.