In Memory Relational Database Project

RelSchema.idr
-------------

module RelSchema

import Data.Vect

public export
Str2Type : String -> Type
Str2Type "INTEGER" = Integer
Str2Type "DECIMAL" = Double
Str2Type "VARCHAR" = String
Str2Type _         = Void

-- A Heterogeneous Vector: a list where each element's type
-- is determined by the corresponding type in 'ts'
public export
data Row : (ts : Vect n Type) -> Type where
  Nil  : Row []
  (::) : {t : Type} -> (x : t) -> Row ts -> Row (t :: ts)

-- Helper to convert your String-based schema to actual Types
public export
SchemaTypes : Vect n String -> Vect n Type
SchemaTypes [] = []
SchemaTypes (s :: ss) = Str2Type s :: SchemaTypes ss

public export
data RelationType : (n : Nat) -> (cnames : Vect n String) -> (ctypes : Vect n String) -> Type where
  MkRel : (rname : String) -> 
          (cnames : Vect n String) ->  -- Brought back to runtime
          (ctypes : Vect n String) ->  -- Brought back to runtime
          (rows : List (Row (SchemaTypes ctypes))) -> 
          RelationType n cnames ctypes

public export
getRname : {n : Nat} -> {cnames : Vect n String} -> {ctypes : Vect n String} -> 
           RelationType n cnames ctypes -> String
getRname (MkRel rname _ _ _) = rname

public export
getCnames : {n : Nat} -> {cnames : Vect n String} -> {ctypes : Vect n String} ->
            RelationType n cnames ctypes -> Vect n String
getCnames (MkRel _ cnames _ _) = cnames

public export
getCtypes : {n : Nat} -> {cnames : Vect n String} -> {ctypes : Vect n String} ->
            RelationType n cnames ctypes -> Vect n String
getCtypes (MkRel _ _ ctypes _) = ctypes

public export
getRows : {n : Nat} -> {cnames : Vect n String} -> {ctypes : Vect n String} ->
          RelationType n cnames ctypes -> List (Row (SchemaTypes ctypes))
getRows (MkRel _ _ _ rows) = rows

showSchema : {n : Nat} -> Vect n String -> Vect n String -> String
showSchema [] [] = ""
showSchema (c::cs) (t::ts) = c++":"++t++","++(showSchema cs ts)

public export
showVal : (s : String) -> Str2Type s -> String
showVal "INTEGER" x = show x
showVal "DECIMAL" x = show x
showVal "VARCHAR" x = substr 1 (minus (length (show x)) 2) (show x)
showVal _         _ = "UNKNOWN"

-- Prints a single row
public export
showRow : {n : Nat} -> (ctypes : Vect n String) -> Row (SchemaTypes ctypes) -> String
showRow [] Nil = ""
showRow (c :: cs) (x :: xs) =
  let strX = showVal c x
      strXs = showRow cs xs
  in case strXs of
       "" => strX
       _  => strX ++ ":" ++ strXs

-- Prints all rows in the list
public export
showTuples : {n : Nat} -> (ctypes : Vect n String) -> List (Row (SchemaTypes ctypes)) -> String
showTuples _ [] = ""
showTuples ctypes (r :: rs) = showRow ctypes r ++ "\n" ++ showTuples ctypes rs

public export
showRelation : {n : Nat} -> {cnames : Vect n String} -> {ctypes : Vect n String} ->
               Maybe (RelationType n cnames ctypes) -> String
showRelation Nothing = "\nNothing\n\n"
showRelation (Just (MkRel rname runtimeCnames runtimeCtypes rows)) =
    let schemaStr = showSchema runtimeCnames runtimeCtypes
        trimmedSchema = substr 0 (minus (length schemaStr) 1) schemaStr
    in "\n" ++ rname ++ "(" ++ trimmedSchema ++ ")\n" ++
       "Number of tuples: " ++ show (length rows) ++ "\n\n" ++
       showTuples runtimeCtypes rows

RelAlgebra.idr
--------------

module RelAlgebra

import RelSchema
import Data.Vect
import Data.Maybe

-- UNION --
public export
union : {n : Nat} -> {cnames1 : Vect n String} -> {ctypes : Vect n String} -> {cnames2 : Vect n String} ->
        RelationType n cnames1 ctypes -> RelationType n cnames2 ctypes -> RelationType n cnames1 ctypes
union (MkRel rname1 cnames1 ctypes rows1) (MkRel rname2 cnames2 ctypes rows2) =
  MkRel (rname1 ++ "_union_" ++ rname2) cnames1 ctypes (rows1 ++ rows2)

-- PROJECT --
getIndices : {m : Nat} -> {n : Nat} -> Vect m String -> Vect n String -> Maybe (Vect m (Fin n))
getIndices Nil _ = Just Nil
getIndices (c :: cs) rCols = do
  idx <- elemIndex c rCols
  rest <- getIndices cs rCols
  pure (idx :: rest)

public export
||| Extracts the i-th value from a Row.
||| The return type is calculated automatically based on the schema.
getColValue : (i : Fin n) -> Row ts -> index i ts
getColValue FZ (x :: xs) = x
getColValue (FS k) (x :: xs) = getColValue k xs

-- produces output types for projected relation
public export
lookupType : String -> Vect n String -> Vect n String -> Maybe String
lookupType _ [] [] = Nothing
lookupType targetName (c :: cs) (t :: ts) =
  if targetName == c
     then Just t
     else lookupType targetName cs ts

-- Unwraps the Maybe at compile time
safeLookup : String -> Vect n String -> Vect n String -> String
safeLookup col cnames ctypes = fromMaybe "UNKNOWN" (lookupType col cnames ctypes)

getOutTypes : {n : Nat} -> Vect m String -> Vect n String -> Vect n String -> Vect m String
getOutTypes [] _ _ = []
getOutTypes (c::cs) inCols inTypes = (safeLookup c inCols inTypes) :: getOutTypes cs inCols inTypes

public export
-- Helper: Projects a single row based on the new types and the found indices
projectRow : {m : Nat} -> {n : Nat} -> (inTypes : Vect n String) ->
             (outTypes : Vect m String) -> (indices : Vect m (Fin n)) ->
             Row (SchemaTypes inTypes) -> Row (SchemaTypes outTypes)
projectRow _ [] [] _ = Nil
projectRow _ (t :: ts) (idx :: idxs) r =
  let
      -- 1. Extract the raw value using the Fin index
      extractedVal = getColValue idx r

      -- 2. Force the type match using the new column type 't'
      typedVal : Str2Type t = believe_me extractedVal
  in
      typedVal :: projectRow _ ts idxs r

-- Main Project Function
public export
project : {m : Nat} -> {n : Nat} -> {inNames : Vect n String} -> {inTypes : Vect n String} ->
          (outNames : Vect m String) -> (inR : RelationType n inNames inTypes) -> 
          Maybe (RelationType m outNames (getOutTypes outNames (getCnames inR) (getCtypes inR)))
project outNames (MkRel rname cnames ctypes rows) = do
  -- 1. Find the indices of the requested column names
  indices <- getIndices outNames cnames

  -- 2. Project every row using our helper function and the new types
  let projectedRows = map (projectRow ctypes (getOutTypes outNames cnames ctypes) indices) rows

  -- 3. Return the newly formed relation using the MkRel constructor
  pure (MkRel (rname ++ "_proj") outNames (getOutTypes outNames cnames ctypes) projectedRows)


Main.idr
--------

import RelSchema
import RelAlgebra
import Data.Vect

-- Defining a relation
r1 : RelationType 3 ["SID", "SNAME", "MAJOR"] ["INTEGER", "VARCHAR", "VARCHAR"]
r1 = MkRel "students1" ["SID", "SNAME", "MAJOR"] ["INTEGER", "VARCHAR", "VARCHAR"]
           [1000::"Jones"::"CSC"::Nil, 1001::"Smith"::"MATH"::Nil]

r2 : RelationType 3 ["SID2", "SNAME2", "MAJOR2"] ["INTEGER", "VARCHAR", "VARCHAR"]
r2 = MkRel "students2" ["SID2", "SNAME2", "MAJOR2"] ["INTEGER", "VARCHAR", "VARCHAR"]
           [2000::"Charles"::"CSC"::Nil, 2001::"David"::"MATH"::Nil]
           
main : IO ()
main = do
  putStrLn("r1 =")
  putStrLn (showRelation (Just r1))
  putStrLn("project [\"SID\",\"MAJOR\"] r1 =")
  putStrLn (showRelation (project ["SID","MAJOR"] r1))