WeatherNews.jl/src/db.jl

using SQLite
using DataFrames
using Dates
using TimeZones
using WeatherNews.Helpers

"""
    sql_fn(db)

Return a function that takes an SQL and optional bind parameters
and runs the query and returns the results as a DataFrame.

```julia
db = SQLite.DB("sql/wn.db")
sql = WeatherNews.DB.sql_fn(db)
res = sql("SELECT * FROM cancellation")
```
"""
function sql_fn(db)
    function sql(query, params=[])
        DBInterface.execute(db, query, params) |> DataFrame
    end
end

"""
Find or create a caster by short name `n`.
"""
function find_or_create_caster(db, n)
    sql_select = "SELECT * FROM caster WHERE n = ? LIMIT 1"
    df = DBInterface.execute(db, sql_select, [n]) |> DataFrame
    if df.id != Union{Missing,Int64}[]
        return df
    else
        DBInterface.execute(db, "INSERT INTO caster (n) VALUES (?)", [n])
        df = DBInterface.execute(db, sql_select, [n]) |> DataFrame
        return df
    end
end

"""
Find all casters
"""
function find_all_casters(db)
    sql_select = "SELECT * FROM CASTER"
    return DBInterface.execute(db, sql_select) |> DataFrame
end

function find_recent_WNL_schedule(db)
    # will return most recent 48 hours of schedule entries at least, including upcoming and past days'
    # limit to 28 assumes worst case scenario that all recent segments are tag-teamed
    sql_select = "SELECT id, jst FROM schedule WHERE segment_id IS NOT 8 ORDER BY jst DESC limit 28;"
    s = DBInterface.execute(db, sql_select) |> DataFrame
    s.jst = jst2zdt.(s.jst)
    s
end

"""
Find a segment by short name `n`.
"""
function find_segment(db, n)
    return DBInterface.execute(db, "SELECT * FROM segment WHERE n = ? LIMIT 1", [n]) |> DataFrame
end

"""
Insert a new `row` into schedule if possible.

If a row with the same `jst` value already exists, it will silently fail on purpose.
"""
function insert_schedule(db, row)
    # find caster
    caster_id = if row[:caster] == ""
        missing
    else
        caster = find_or_create_caster(db, row[:caster])
        caster.id[1]
    end
    # find segment
    segment = find_segment(db, row[:title])
    # insert row
    try
        sql_insert = "INSERT INTO schedule (caster_id, segment_id, jst) VALUES (?, ?, ?)"
        timestamp = Dates.format(row[:t], "Y-mm-ddTHH:MM:SSz")
        #@info "before insert" caster_id, segment.id[1] timestamp
        ri = DBInterface.execute(db, sql_insert, [caster_id, segment.id[1], timestamp])
        return ri
    catch e
        @debug e
        return false
    end
end

"""
Return a row from the schedule based on its `jst` time.
"""
function find_schedule_by_jst(db, jst::String)
    df = DBInterface.execute(db,
                             "SELECT * FROM schedule WHERE jst = ? AND segment_id IS NOT 8 LIMIT 1",
                             [jst]) |> DataFrame
    if size(df, 1) == 0
        return missing
    else
        return eachrow(df)[1]
    end
end

"Update a schedule row with its `video_id` and `view_count`."
function update_schedule_with_video(db, video_id::String, view_count::Union{Int,Missing}, id::Int)
    try
        sql_update = "UPDATE schedule SET video_id = ?, view_count = ? WHERE id = ?;"
        DBInterface.execute(db, sql_update, [video_id, view_count, id])
    catch e
        @debug e
        return e
    end
end

"Update a schedule row with its `view_count`."
function update_schedule_with_viewcount(db, view_count::Int, id::Int)
    try
        sql_update = "UPDATE schedule SET view_count = ? WHERE id = ?;"
        DBInterface.execute(db, sql_update, [view_count, id])
    catch e
        @debug e
        return e
    end
end

"""
Cancel a schedule, and replace the entry with a new caster (or none at all).

If there's no new caster to fill in, pass in `missing` as the `new_caster_id`.
"""
function cancel_schedule(db, schedule_id, new_caster_id)
    sql_insert = "INSERT INTO cancellation SELECT id, caster_id, segment_id, jst FROM schedule WHERE id = ?"
    sql_update = "UPDATE schedule SET caster_id = ? WHERE id = ?"
    try
        return DBInterface.transaction(db) do
            DBInterface.execute(db, sql_insert, [schedule_id])
            DBInterface.execute(db, sql_update, [new_caster_id, schedule_id])
        end
    catch e
        @debug e
        return e
    end
end

"""
Load the entire schedule into a DataFrame
"""
function load_schedule_joined(db)
    sql_select = """
    SELECT s.id         AS id,
           seg.n        AS segment,
           c.n          AS caster,
           s.video_id   AS video_id,
           s.view_count AS view_count,
           m.val        AS mscale,
           s.jst        AS jst
      FROM schedule s
           JOIN      segment seg ON seg.id = s.segment_id
           LEFT JOIN caster  c   ON c.id   = s.caster_id
           LEFT JOIN mscale  m   ON m.jst  = s.jst
     ORDER BY jst
    """
    s = DBInterface.execute(db, sql_select) |> DataFrame
    s.jst = jst2zdt.(s.jst)
    return s
end

#=

using WeatherNews
using WeatherNews: API, DB
using DataFrames
using DataFramesMeta
using SQLite
using Statistics

db = SQLite.DB("sql/wn.db")
s = DB.load_schedule_joined(db)

# Try finding the median instead of the average.
rs = @chain s begin
    @subset(:segment .!= "au PAY")
    dropmissing(:view_count)
    dropmissing(:caster)
    @subset(:view_count .!= 0)
    @select(:caster, :view_count)
    @groupby(:caster)
    @combine begin
        :median_views = median(:view_count)
        :average_views = mean(:view_count)
    end
    sort(:median_views, rev=true)
end

# Summarize views by day
tf = Day(1)
rs = @chain s begin
    dropmissing(:view_count)
    @subset(:view_count .!= 0)
    @subset(hour.(:jst) .!= 23)
    @transform(:jst2 = floor.(:jst, tf))
    @groupby(:jst2)
    @combine begin
        :views = sum(:view_count)
    end
    @subset(:views .> 100000)
    @select(:jst = :jst2, :views)
end

=#