Calc continuum using AFS

eford · eford · commit 7b076a932aeb · 2021-04-03T12:25:32.000-04:00
diff --git a/Project.toml b/Project.toml
@@ -5,8 +5,10 @@ version = "0.0.2"
 
 [deps]
 AstroLib = "c7932e45-9af1-51e7-9da9-f004cd3a462b"
+Atom = "c52e3926-4ff0-5f6e-af25-54175e0327b1"
 CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
 ColorSchemes = "35d6a980-a343-548e-a6ea-1d62b119f2f4"
+DSP = "717857b8-e6f2-59f4-9121-6e50c889abd2"
 DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
 EchelleCCFs = "5d9f57b1-d7b5-417c-8d79-eedbcaad0187"
@@ -15,15 +17,19 @@ FileIO = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 GPLinearODEMaker = "27ef9b34-1325-4cec-ba33-00f2f4637873"
 JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
+Juno = "e5e0dc1b-0480-54bc-9374-aad01c23163d"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 LsqFit = "2fda8390-95c7-5789-9bda-21331edee243"
 Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
 MultivariateStats = "6f286f6a-111f-5878-ab1e-185364afe411"
+NaNMath = "77ba4419-2d1f-58cd-9bb1-8ffee604a2e3"
 Optim = "429524aa-4258-5aef-a3af-852621145aeb"
 PDMats = "90014a1f-27ba-587c-ab20-58faa44d9150"
 Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
 PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0"
 Query = "1a8c2f83-1ff3-5112-b086-8aa67b057ba1"
+RCall = "6f49c342-dc21-5d91-9882-a32aef131414"
+Revise = "295af30f-e4ad-537b-8983-00126c2a3abe"
 RvSpectML = "1f61ac2c-3a1c-440a-925a-2707197041c8"
 RvSpectMLBase = "c48404b2-35ea-40e7-ac7f-06a53de703d6"
 RvSpectMLPlots = "6ad363e8-653f-4efd-a04b-f033e69a984c"
diff --git a/analyze_daily_aves.jl b/analyze_daily_aves.jl
@@ -0,0 +1,74 @@
+if occursin(r"RvSpectMLEcoSystem$", pwd())   cd("NeidSolarScripts")   end
+ using Pkg
+ Pkg.activate(".")
+ using HDF5, JLD2, FITSIO, FileIO, DataFrames, Query, RvSpectMLBase, Statistics, MultivariateStats, Plots
+
+if occursin(r"NeidSolarScripts$", pwd())   cd("output56")   end
+files = DataFrame(:filename=>readdir())
+jld_files = files |> @filter(contains(_.filename,"new.jld2")) |> DataFrame
+
+plt = plot()
+ for day in eachrow(jld_files)
+    #f = h5open("solar_20210104_new.jld2")
+    f = h5open(day.filename)
+    ccfs = read(f,"ccfs_espresso")
+    M = fit(PCA,ccfs[401:end-401,60:end-60],maxoutdim=10,pratio=1.0)
+    plot!(plt,M.mean./maximum(M.mean), label="Mean")
+    map(i->plot!(plt,sign(M.proj[200,i]).*M.proj[:,i]./maximum(abs.(extrema(M.proj[:,i]))), label=string(i)),1:4);
+    break
+ end
+ display(plt)
+ #savefig("ccf_pca_20120104.png")
+
+v_grid =read(h5open(jld_files.filename[1]),"v_grid")
+v_grid = 100.0 * (-616:616)
+jld_files.mean_ccf = Vector{Vector{Float64}}(undef,size(jld_files,1))
+jld_files.smooth_ccf = Vector{Vector{Float64}}(undef,size(jld_files,1))
+jld_files.ccf_resid = Vector{Array{Float64,2}}(undef,size(jld_files,1))
+#plt = plot()
+for (i,day) in enumerate(eachrow(jld_files))
+     #f = h5open("solar_20210104_new.jld2")
+     f = h5open(day.filename)
+     #jld_files.mean_ccf[i] = read(f,"mean_ccf")
+     #jld_files.smooth_ccf[i] = read(f,"ccf_template_smooth")
+     jld_files.ccf_resid[i] = read(f,"ccf_resid_minus_rv_proj")
+  end
+  #display(plt)
+  #savefig("ccf_pca_20120104.png")
+mean_ccf_matrix = reduce(hcat,jld_files.mean_ccf)
+smooth_ccf_matrix = reduce(hcat,jld_files.smooth_ccf)
+
+norm_mean = vec((mean(mean_ccf_matrix[1:300,:],dims=1).+mean(mean_ccf_matrix[end-300:end,:],dims=1))/2)
+norm_smooth = vec((mean(smooth_ccf_matrix[1:300,:],dims=1).+mean(smooth_ccf_matrix[end-300:end,:],dims=1))/2)
+norm_mean = vec((mean(mean_ccf_matrix[600:632,:],dims=1))/2)
+norm_smooth = vec((mean(smooth_ccf_matrix[600:632,:],dims=1))/2)
+
+plot(v_grid,mean_ccf_matrix./norm_mean'.-smooth_ccf_matrix./norm_smooth')
+plot(v_grid,(mean_ccf_matrix.-smooth_ccf_matrix)./norm_smooth')
+xlims!(-12e3,12e3)
+
+plot(v_grid,(mean_ccf_matrix-smooth_ccf_matrix)[:,1])
+
+cols_use = 301:(size(mean_ccf_matrix,1)-301)
+M = fit(PCA,mean_ccf_matrix[cols_use,:],maxoutdim=10,pratio=1.0)
+principalvars(M)
+scatter(log10.(principalvars(M)./tprincipalvar(M)));
+ xlabel!("Number of PCs");
+ ylabel!("log frac variance remaining")
+
+plot(v_grid[cols_use,:],mean(mean_ccf_matrix[cols_use,:],dims=2),label="Mean");
+ plot!(v_grid[cols_use,:],M.proj[:,1].+0.5,label="1");
+ plot!(v_grid[cols_use,:],M.proj[:,2].+0.4,label="2");
+ plot!(v_grid[cols_use,:],M.proj[:,3].+0.25,label="3");
+ plot!(v_grid[cols_use,:],M.proj[:,4].+0.15,label="4");
+ plot!(v_grid[cols_use,:],M.proj[:,5].+0.,label="5");
+ xlims!(-1e4,1e4)
+#savefig("daily_ccf_PCA.png")
+
+f8 = h5open(jld_files.filename[8])
+ccfs =read(f8,"ccfs_espresso")
+
+plot(v_grid,ccfs[:,100:120].-mean(ccfs[:,100:120],dims=2))
+
+plot(v_grid,ccfs[:,100:120].-mean(ccfs[:,100:120],dims=2))
+xlims!(-1.2e3,1.2e3)
diff --git a/examples/calc_continuum.jl b/examples/calc_continuum.jl
@@ -0,0 +1,137 @@
+if occursin(r"RvSpectMLEcoSystem$", pwd())
+    cd("NeidSolarScripts")
+    using Pkg
+    Pkg.activate(".")
+ end
+
+verbose = true
+ make_plots = true
+ if verbose && !isdefined(Main,:RvSpectML)  println("# Loading RvSpecML")    end
+ using RvSpectMLBase
+ using EchelleInstruments, EchelleInstruments.NEID
+ using EchelleCCFs
+ #=
+ using RvSpectML
+ using NeidSolarScripts
+ using NeidSolarScripts.SolarRotation
+ if verbose   println("# Loading other packages")    end
+ =#
+ using CSV, DataFrames, Query, StatsBase, Statistics, Dates
+ using JLD2, FileIO
+ using NaNMath
+
+target_subdir = "good_days/DRPv0.7"   # USER: Replace with directory of your choice
+  fits_target_str = "Sun"
+  output_dir = "output/continuum"
+  #outputs = Dict{String,Any}()
+  paths_to_search_for_param = [pwd(),"examples",joinpath(pkgdir(RvSpectMLBase),"..","RvSpectML","examples"), "/gpfs/scratch/jpn23/"]
+  # NOTE: make_manifest does not update its paths_to_search when default_paths_to_search is defined here, so if you change the line above, you must also include "paths_to_search=default_paths_to_search" in the make_manifest() function call below
+  pipeline_plan = PipelinePlan()
+  dont_make_plot!(pipeline_plan, :movie)
+
+reset_all_needs!(pipeline_plan)
+#if need_to(pipeline_plan,:read_spectra)
+if verbose println("# Finding what data files are avaliable.")  end
+if isfile("manifest.csv")
+    df_files  = CSV.read("manifest.csv", DataFrame)
+    @assert size(df_files,1) >= 1
+    @assert hasproperty(df_files,:Filename)
+    @assert hasproperty(df_files,:target)
+    @assert hasproperty(df_files,:bjd)
+    @assert hasproperty(df_files,:ssbz)
+    @assert hasproperty(df_files,:exptime)
+else
+    eval(read_data_paths(paths_to_search=paths_to_search_for_param))
+    @assert isdefined(Main,:neid_data_path)
+    df_files = make_manifest(neid_data_path, target_subdir, NEID )
+    CSV.write("manifest.csv", df_files)
+end
+
+if verbose println("# Reading in customized parameters from param.jl.")  end
+   if !@isdefined(idx_day_to_use)
+       idx_day_to_use = 1
+   end
+   eval(code_to_include_param_jl(paths_to_search=paths_to_search_for_param))
+   if match(r"neidL1_(\d+)[T_](\d+)\.fits$", first(df_files_use.Filename))[1] ==  match(r"neidL1_(\d+)[T_](\d+)\.fits$", last(df_files_use.Filename))[1]
+      date_str = match(r"neidL1_(\d+)[T_](\d+)\.fits$", first(df_files_use.Filename))[1]
+    else
+      date_str = string(match(r"neidL1_(\d+)[T_](\d+)\.fits$", first(df_files_use.Filename))[1]) * "-" * string(match(r"neidL1_(\d+)[T_](\d+)\.fits$", last(df_files_use.Filename))[1])
+   end
+   #=
+   outputs["df_files_use"] = df_files_use
+
+   outputs_filename = joinpath(output_dir,"solar_" * date_str * "_new.jld2")
+   if isfile(outputs_filename) && false
+     times_already_processed = load(outputs_filename, "times")
+     files_in_day_to_process = size(df_files_solar_by_day.data[idx_day_to_use],1)
+      if files_in_day_to_process == length(times_already_processed)
+         println("# Already processed all ", length(times_already_processed), " files for ", date_str)
+         exit()
+      end
+   end
+   =#
+
+using Distributed
+addprocs(4)
+
+@everywhere using RCall
+@everywhere afs_src = joinpath(pwd(),"src","AFS.R")
+@everywhere R"source($afs_src)"
+@everywhere using Pkg
+@everywhere Pkg.activate(".")
+@everywhere using RvSpectMLBase
+@everywhere using EchelleInstruments
+
+@everywhere function calc_continuum_model(spectrum::AbstractSpectra2D; order_idx::Integer )
+    possible_pix = get_pixel_range(get_inst(spectrum),order_idx)
+    bad_pix = bad_col_ranges(get_inst(spectrum),order_idx)
+    pix_rng = EchelleInstruments.calc_complement_index_ranges(possible_pix,bad_pix)
+    pix = mapreduce(p->collect(p),vcat,pix_rng)
+    afs_inputs = zeros(Float64,length(pix),2)
+    afs_inputs[:,1] .= spectrum.λ[pix,order_idx]
+    afs_inputs[:,2] .= spectrum.flux[pix,order_idx]
+    @assert !any(isnan.(afs_inputs))
+    #=
+    wv = mapreduce(p->spec.λ[p,order_idx],vcat,pix_rng)
+    @assert !any(isnan.(wv))
+    inten = mapreduce(p->convert(Vector{Float64},spec.flux[p,order_idx]),vcat,pix_rng)
+    @assert !any(isnan.(inten))
+    afs_inputs = hcat(wv,inten)
+    =#
+    #df = DataFrame("wv"=>wv,"intes"=>inten)
+    afs_output_R = R"AFS($afs_inputs,0.95,0.25)"
+    afs_output = rcopy(afs_output_R) 
+    continuum = zeros(eltype(spectrum.flux),size(spectrum.flux[:,order_idx]))
+    continuum = fill(NaN,size(spectrum.flux[:,order_idx]))
+    continuum[pix] .= afs_output
+    return continuum
+end
+
+
+
+@everywhere using EchelleInstruments.NEID
+@everywhere function calc_continuum_model(spectrum::AbstractSpectra2D )
+    vec_of_orders = pmap(ord->calc_continuum_model(spectrum,order_idx=ord), min_order(get_inst(spectrum)):max_order(get_inst(spectrum)) )
+    output = fill(NaN, size(spectrum.flux))
+    for (i,ord) in enumerate(min_order(get_inst(spectrum)):max_order(get_inst(spectrum)))
+        output[:,ord] .= vec_of_orders[i]
+    end
+    return output
+end
+
+num_days_to_process = size(df_files_solar_by_day,1)
+for idx_day_to_use in 1:num_days_to_process
+  df_files_use = df_files_solar_by_day[idx_day_to_use,:data] |> @orderby(_.bjd) |> DataFrame
+
+  println("# *** Working on day ", idx_day_to_use, " with ", size(df_files_use,1), "files. ***" )
+  for row in eachrow(df_files_use)
+    spec = NEID.read_data(row)
+    m = match(r"neidL1_(\d+)[T_](\d+)\.fits$",row.Filename)
+    output_filename = "neidL1_" * m.captures[1] * "T" * m.captures[2] * ".jld2"
+    output_filename = joinpath(output_dir,output_filename)
+    println("# Working on ", output_filename)
+    continuum = calc_continuum_model(spec)
+    @save output_filename continuum
+  end
+
+end
diff --git a/examples/plot_continuum.jl b/examples/plot_continuum.jl
@@ -0,0 +1,116 @@
+if occursin(r"RvSpectMLEcoSystem$", pwd())
+    cd("NeidSolarScripts")
+    using Pkg
+    Pkg.activate(".")
+elseif occursin(r"NeidSolarScripts$", pwd())
+   using Pkg
+   Pkg.activate(".")
+ elseif occursin(r"examples$", pwd())
+    cd("..")
+    using Pkg
+    Pkg.activate(".")
+ end
+
+using JLD2, FileIO
+using CSV, DataFrames, Query
+#using StatsBase, Statistics
+using Dates, NaNMath
+using RvSpectMLBase
+using EchelleInstruments
+
+using Plots
+
+target_subdir = "good_days/DRPv0.7"   # USER: Replace with directory of your choice
+  fits_target_str = "Sun"
+  output_dir = "output/continuum"
+  #outputs = Dict{String,Any}()
+  paths_to_search_for_param = [pwd(),"examples",joinpath(pkgdir(RvSpectMLBase),"..","RvSpectML","examples"), "/gpfs/scratch/jpn23/"]
+  # NOTE: make_manifest does not update its paths_to_search when default_paths_to_search is defined here, so if you change the line above, you must also include "paths_to_search=default_paths_to_search" in the make_manifest() function call below
+  pipeline_plan = PipelinePlan()
+  dont_make_plot!(pipeline_plan, :movie)
+
+verbose = false
+reset_all_needs!(pipeline_plan)
+#if need_to(pipeline_plan,:read_spectra)
+if verbose println("# Finding what data files are avaliable.")  end
+if isfile("manifest.csv")
+    df_files  = CSV.read("manifest.csv", DataFrame)
+    @assert size(df_files,1) >= 1
+    @assert hasproperty(df_files,:Filename)
+    @assert hasproperty(df_files,:target)
+    @assert hasproperty(df_files,:bjd)
+    @assert hasproperty(df_files,:ssbz)
+    @assert hasproperty(df_files,:exptime)
+else
+    eval(read_data_paths(paths_to_search=paths_to_search_for_param))
+    @assert isdefined(Main,:neid_data_path)
+    df_files = make_manifest(neid_data_path, target_subdir, NEID )
+    CSV.write("manifest.csv", df_files)
+end
+
+idx_day_to_use = 1
+if verbose println("# Reading in customized parameters from param.jl.")  end
+   if !@isdefined(idx_day_to_use)
+       idx_day_to_use = 1
+   end
+   eval(code_to_include_param_jl(paths_to_search=paths_to_search_for_param))
+   #=
+   if match(r"neidL1_(\d+)[T_](\d+)\.fits$", first(df_files_use.Filename))[1] ==  match(r"neidL1_(\d+)[T_](\d+)\.fits$", last(df_files_use.Filename))[1]
+      match(r"neidL1_(\d+)[T_](\d+)\.fits$", first(df_files_use.Filename))[1]date_str = match(r"neidL1_(\d+)[T_](\d+)\.fits$", first(df_files_use.Filename))[1]
+    else
+      date_str = string(match(r"neidL1_(\d+)[T_](\d+)\.fits$", first(df_files_use.Filename))[1]) * "-" * string(match(r"neidL1_(\d+)[T_](\d+)\.fits$", last(df_files_use.Filename))[1])
+   end
+   outputs["df_files_use"] = df_files_use
+
+   outputs_filename = joinpath(output_dir,"solar_" * date_str * "_new.jld2")
+   if isfile(outputs_filename) && false
+     times_already_processed = load(outputs_filename, "times")
+     files_in_day_to_process = size(df_files_solar_by_day.data[idx_day_to_use],1)
+      if files_in_day_to_process == length(times_already_processed)
+         println("# Already processed all ", length(times_already_processed), " files for ", date_str)
+         exit()
+      end
+   end
+   =#
+
+continua = Vector{Array{Float32,2}}()
+ for row in eachrow(df_files_use)
+    m = match(r"(neidL1_\d+[T_]\d+)\.fits$", row.Filename)
+    continuum_filename = joinpath(output_dir, m.captures[1] * ".jld2")
+    jldopen(continuum_filename,"r") do file
+        push!(continua,file["continuum"])
+    end
+ end
+
+using Plots
+using StatsBase
+
+ord = 90
+ pix = get_pixel_range(NEID2D(),ord)
+ plt = plot()
+ for obs in vcat(100:105,150:155,200:205)
+    plot!(continua[obs][pix,ord], label=string(obs) )
+ end
+ display(plt)
+
+ nobs = size(df_files_use,1)
+ mean_continuum = mapreduce(obs->continua[obs][get_pixel_range(NEID2D(),ord),ord],.+,1:nobs) ./ nobs
+
+ plt = plot()#legend=:none)
+ mean_mean_continuum = Float64[]
+ for obs in 1:nobs
+    #plot!(continua[obs][pix,ord]./mean_continuum, label=string(obs) )
+    push!(mean_mean_continuum,NaNMath.mean((continua[obs][pix,ord]./mean_continuum)[1000:end-1000] ))
+ end
+ scatter!(plt, 1:nobs,mean_mean_continuum)
+ #display(plt)
+
+ plt = plot(legend=:none)
+  for obs in 1:nobs# vcat(100:105,150:155,200:205)
+     pix = get_pixel_range(NEID2D(),ord)
+     plot!(continua[obs][pix,ord]./(mean_continuum.*mean_mean_continuum[obs]), label=string(obs) )
+  end
+  display(plt)
+
+ylims!(plt,0.99,1.01)
+ylims!(plt,0.9,1.1)