library(tidyverse)
library(MLmetrics)
pandas = read_tsv("~/RIDIR/Datasets/AreaInterpolateValidation/geopandas_intensive2000.tsv", col_names = c("TID", "pandas")) %>%
arrange(desc(pandas))
head(pandas)
nrow(pandas)
spark = read_tsv("~/RIDIR/Datasets/AreaInterpolateValidation/geospark_intensive2000.tsv", col_names = c("TID", "spark")) %>%
arrange(desc(spark))
head(spark)
nrow(spark)
table = pandas %>% left_join(spark, by = c("TID"))
head(table)
nrow(table)
head(table %>% filter(is.na(spark)))
table = spark %>% inner_join(pandas, by = c("TID")) %>%
mutate(diff = abs(pandas - spark)) %>%
arrange(desc(diff))
head(table)
nrow(table)
ggplot(data = table, aes(x = pandas, y = spark)) + geom_point()
print(paste("R2: ", R2_Score(table$spark, table$pandas)))
print(paste("MAE: ", MAE(table$spark, table$pandas)))
print(paste("MSE: ", MSE(table$spark, table$pandas)))
print(paste("RMSE: ", RMSE(table$spark, table$pandas)))
print(paste("RAE: ", RAE(table$spark, table$pandas)))