Custom functions

This section contains the list of custom functions that have been written to prepare, handle and visualize the data exported from the database.

Libraries

# Import libraries
library(tidyverse)
library(stringr)

Archaeobotany

archaeobotany_tables()

This function has two arguments:

a dataframe of the exported table of plants from the database (view_archaeobot.csv).

 [1] "site_code"               "site_name"              
 [3] "type_name"               "region_name"            
 [5] "data_valid_start"        "data_valid_end"         
 [7] "weight"                  "sampling_notes"         
 [9] "extra_notes"             "short_ref"              
[11] "culture_type"            "x"                      
[13] "y"                       "Triticum.aestivum.durum"
[15] "Triticum.dicoccum"       "Triticum.monococcum"    
[17] "Avena.sp"                "Hordeum.vulgare"        
[19] "Panicum.milliaceum"      "Secale.cereale"         
[21] "Setaria.italica"         "Sorghum.bicolor"        
[23] "Cerealia.ind"            "Leguminosae"            
[25] "Lens.culinaris"          "Pisum.sativum"          
[27] "Vicia.faba"              "Vicia.sativa"           
[29] "Vicia.sp"                "Lathyrus.cicera.sativus"
[31] "Cicer.aretinum"          "Cornus.mas"             
[33] "Corylus.avellana"        "Ficus.carica"           
[35] "Fragaria.vesca"          "Juglans.regia"          
[37] "Castanea.sativa"         "Malus.domestica"        
[39] "Olea.europaea.L"         "Prunus.cerasus"         
[41] "Prunus.avium"            "Prunus.sp"              
[43] "Prunus.persica"          "Prunus.domestica"       
[45] "Prunus.spinosa"          "Rubus.fruticosus"       
[47] "Pyrus.communis"          "Sambucus.nigra"         
[49] "Cucumis.melo"            "Vitis.vinifera"         
[51] "Linum.usatissimus"       "Sorbus.sp"

the century of interest.

The function archaeobotany_tables() can be used to return the ubiquity, relative proportions or a print of the table with the sites from the chosen century. The comments in the code below explain the process.

Show the code

##FUNCTION FOR GENERATING CENTURY BASED
# - UBIQUITY
# - RELATIVE PROPORTIONS
# - A PRINT OF THE TABLE

archaeobotany_tables <- function(x, century) {
  # Load the tidyverse library if it hasn't been loaded in the page before
  library(tidyverse)
  
  # Remove NAs
  x[is.na(x)] <-0 
  
  # Filter the table for the chosen century
  # package: tidyverse
  x <- filter(x, data_valid_start <= century & data_valid_end >= century)
  
  # The total of each row is needed to calculate the relative proportions
  # Note: Calculation starts from column 14 because it is the first column with numerical data. If the table exported from the database changes, this number must be adjusted.
  Total <- rowSums(x[,14:ncol(x)])
  
  # Subsetting the given dataframe by creating a new dataframe with fewer columns 
  plants <- data.frame(x$site_name, x$type_name, 
                       x$data_valid_start, x$data_valid_end,
                       x$culture_type, x[14:ncol(x)], 
                       Total
  )
  
  # Calculating the relative proportions and rounding the results to 2 digits.
  Rel_Prop <- round(((x[14:ncol(x)]/Total)*100), digits=2)
  
  # Ubiquity: 
  #Note: It is given by the no. of sites where the plant is present divided by the total of sites
  # Note: Total of sites: (No. of rows - header row)
  
  # Creating a new dataframe from the Relative Proportions one (Rel_Prop). 
  # Note: This can be done also from the original dataframe, it is not important since it is just a calculation based on presence/absence. I chose this dataframe as it has already the columns I need.
  Pres_Abs <- Rel_Prop
  
  # If the value is > 0 it means that the plant is present: this line replaces this value with a 1 (indicating presence)
  Pres_Abs[Pres_Abs > 0] <- 1
  
  # In how many sites is this plant present?
  Tot_sites_present <- colSums(Pres_Abs)

  # Finally calculate ubiquity
  # Note: The score is multiplied by 100 to obtain results in %
  Ubiquity <- (Tot_sites_present / nrow(Pres_Abs))*100
  
  return(list(
    Ubiquity_exp = Ubiquity,
    Rel_Prop_exp = Rel_Prop,
    Raw_Counts = plants
    ))
}

Rel_Prop_per_Century()

This function has two arguments: - a dataframe of the exported table of plants from the database (view_archaeobot.csv).

the century of interest. The function Rel_Prop_per_Century() can be used to return the relative proportions of each site from the chosen century. The comments in the code below explain the process.

Show the code

## Convert each site raw data into relative proportions

Rel_Prop_per_Century <- function(x, century) {
  
  # Remove NAs
  x[is.na(x)] <-0
  
  # Filter the table for the chosen century
  # package: tidyverse
  library(tidyverse)
  x <- filter(x, data_valid_start <= century & data_valid_end >= century)
  
  # Calculate the total of the row and divide each value by the total to get proportions
  # round() is used to get two decimal values
  Total_per_site <- rowSums(x[,14:ncol(x)])
  Rel_Prop_per_site <- round(((x[14:ncol(x)]/Total_per_site)*100), digits=2)
  
  # Create new dataframe with the information we need
  plants_rel_prop <- data.frame(
    "Site" = x$site_name, 
    "Type" = x$type_name, 
    "From.Century" = x$data_valid_start, 
    "To.Century"= x$data_valid_end,
    "Weight"=x$weight,
    "Culture"=x$culture_type, 
    "x"=x$x, 
    "y"=x$y,
    Rel_Prop_per_site
  )
  
  return(plants_rel_prop)
}

Ubiquity_macroreg_chrono()

This function has three arguments:

a dataframe of the exported condensed table of plants from the database (Archaeobot_Condensed.csv). It is a table of plants exported with their common English name and with a column of totals for each type of plant (Cereals, Fruit/Nuts, …).
the macroregion of interest: Southern Italy, Central Italy, Northern Italy.
the chronology of interest: R, LR, EMA, Ma.

Show the code

Ubiquity_macroreg_chrono <- function(df, macroregion, chronology) {
  
  # Load the tidyverse library if it hasn't been loaded in the page before
  library(tidyverse)
  
  # Remove NAs
  df[is.na(df)] <- 0
  
  # Filter the table for the chosen chronology and macroregion
  # package: tidyverse
  df.chronology <- filter(df, Chronology == chronology & Macroregion == macroregion)
  
  #Remove useless columns: Tots, unsp.cols
  df.chronology <- df.chronology[-c(23,24,32,33,56)] 
  
  # Create a counts dataframe where the taxa that are present will be stored as 1
  df.counts <- df.chronology[14:ncol(df.chronology)]
  df.counts[df.counts>0] <- 1
  
  # Create a dataframe with a sum of presences
  df.sites.present <- colSums(df.counts)

  # Calculate ubiquity and round the value to 2 decimals
  Ubiquity <- (df.sites.present / nrow(df.chronology))*100
  Ubiquity <- round(Ubiquity, 2)
  
  # Add a category that explains what type of plant is it (useful for visualisation)
  Plants_Type <- data.frame(Type=1:38)
  Plants_Type$Type[1:9] <- "Cereals"
  Plants_Type$Type[10:16] <- "Pulses"
  Plants_Type$Type[17:38] <- "Fruits/Nuts"

  # Final dataframe that the function will return
  Ubiquity <- cbind.data.frame("Chronology" = chronology, 
                               "Macroregion" = macroregion, 
                               "Plant"=names(Ubiquity), 
                               "Plant.Type"=Plants_Type$Type, 
                               "Ubiquity"= Ubiquity)
  Ubiquity <- data.frame(Ubiquity, row.names = NULL)
  
  
  return(Ubiquity)
}

Ubiquity_type_chrono()

This function has three arguments:

a dataframe of the exported condensed table of plants from the database (Archaeobot_Viz.csv). It is a table of plants exported with their common English name (Cereals, Fruit/Nuts, …).
the site typology of interest.
the chronology of interest: R, LR, EMA, Ma.

# Ubiquity for type and chronology
Ubiquity_type_chrono <- function(df, type, chronology) {
  # Load the tidyverse library if it hasn't been loaded in the page before
  library(tidyverse)
  
  # Remove NAs
  df[is.na(df)] <- 0
  
  # Filter the table for the chosen chronology and macroregion
  # package: tidyverse
  df.chronology <- filter(df, Type == type & Chronology == chronology)
  
  # Selecting the first plant column. In this way I will avoid errors if the 
  # structure of the data set changes in the future.
  first_col_index <- which(names(Df_Cond_Plants) == "Common.Wheat")

  # Create a counts dataframe where the taxa that are present will be stored as 1
  df.counts <- df.chronology[first_col_index:ncol(df.chronology)]
  df.counts[df.counts>0] <- 1
  
  # Create a dataframe with a sum of presences
  df.sites.present <- colSums(df.counts)
  
  # Calculate ubiquity and round the value to 2 decimals
  Ubiquity <- (df.sites.present / nrow(df.chronology))*100
  Ubiquity <- round(Ubiquity, 2)
  
  # Returning a dataframe
  Ubiquity <- as.data.frame(Ubiquity)
  
  # Transposing the dataframe to have a single row
  Ubiquity <- t(Ubiquity)
  
  # Assigning the site type as the row name 
  row.names(Ubiquity) <- type

  return(Ubiquity)
}

Zooarchaeology

Medians_Chrono_Zoo()

Code

Medians_Chrono_Zoo <- function(x, Chrono) {
  library(tidyverse)
  library(matrixStats)
  x <- filter(x, Chronology == Chrono)
  x[is.na(x)] <- 0 #Get rid of NAs
  x$Total <- (rowSums(x[,15:ncol(x)]))
  Rel_Prop <- round(((x[,15:ncol(x)]/x$Total)), digits=2)
  
  Rel_Prop[Rel_Prop== 0] <- NA
  
  medians <- apply(Rel_Prop, 2, weightedMedian, w=x[,7], na.rm=TRUE)
  final_medians <- data.frame(Chrono = medians)
  colnames(final_medians) <- Chrono
  return (final_medians)
}

zooarch_tables_general()

This function has one argument: a dataframe of the exported condensed table of faunal remains from the database (Zooarch_Condensed.csv). It is a table of animals exported with their common English name, columns with context information. Some of the columns are the sum of different columns, with the SQL function COALESCE(). For instance, the column Edible Wild Mammals is the sum of values for red deer, roe deer, hare, wild boar.

Show the code

zooarch_tables_general <- function(x) {
  library(matrixStats)
  library(tidyverse)
  
  # Remove NAs
  x[is.na(x)] <- 0
  
  # The total of each row is needed to calculate the relative proportions
  # Note: Calculation starts from column 15 because it is the first column 
  # with numerical data. 
  # If the table exported from the database changes, this number must be adjusted.
  Total <- rowSums(x[,15:ncol(x)])
  zoo_subset <- x[,c(2:ncol(x))]
  
  Rel_Prop <- round(((x[,15:ncol(x)]/Total)*100), digits=2)
  Rel_prop_with_sites <- data.frame(zoo_subset[,1:5], Rel_Prop)
  Rel_prop_xy <- data.frame(zoo_subset[,1:13], Rel_Prop)
  
  return(
    list(
      Rel_Prop_exp = Rel_prop_with_sites, #Rel Prop by century
      Rel_Prop_exp_XY = Rel_prop_xy, # Rel props with coordinates
      Means = colMeans(Rel_Prop), #Column means, by century
      Medians = apply(Rel_Prop, 2, weightedMedian, w=x[,7], na.rm=TRUE), 
      RawCounts = zoo_subset #Original table, by century
    )
  )
}

zooarch_tables_general

Show the code

zooarch_tables <- function(x, century) {
  library(tidyverse)
  library(matrixStats)
  
  x[is.na(x)] <- 0 #Get rid of NAs
  
  # Need to filter the given table for the century
  x <- filter(x, x$From.Century <= century & x$To.Century >= century)
  
  Total <- rowSums(x[,15:ncol(x)]) # Row wise total for rel prop
  zoo_subset <- x[,c(2:5,7:10,11:ncol(x))]
  
  Rel_Prop <- round(((x[,15:ncol(x)]/Total)*100), digits=2)
  Rel_prop_with_sites <- data.frame(zoo_subset[,1:6], Rel_Prop) # Check if it is the 5th or 6th column
  Rel_prop_xy <- data.frame(zoo_subset[,1:10], Rel_Prop) # Check if it is to 10 or to 13
  
  return(
    list(
    Rel_Prop_exp = Rel_prop_with_sites, #Rel Prop by century
    Rel_Prop_exp_XY = Rel_prop_xy,
    #Means = colMeans(Rel_Prop), #Column means, by century
    Means = apply(Rel_Prop, 2, weighted.mean, w=x[,7], na.rm=TRUE),
    Medians = apply(Rel_Prop, 2, weightedMedian, w=x[,7], na.rm=TRUE),
    RawCounts = zoo_subset #Original table, by century
    )
  )
}