# Import libraries
library(tidyverse)
library(stringr)
Custom functions
This section contains the list of custom functions that have been written to prepare, handle and visualize the data exported from the database.
Libraries
Archaeobotany
archaeobotany_tables()
This function has two arguments:
a dataframe of the exported table of plants from the database (
view_archaeobot.csv
).[1] "site_code" "site_name" [3] "type_name" "region_name" [5] "data_valid_start" "data_valid_end" [7] "weight" "sampling_notes" [9] "extra_notes" "short_ref" [11] "culture_type" "x" [13] "y" "Triticum.aestivum.durum" [15] "Triticum.dicoccum" "Triticum.monococcum" [17] "Avena.sp" "Hordeum.vulgare" [19] "Panicum.milliaceum" "Secale.cereale" [21] "Setaria.italica" "Sorghum.bicolor" [23] "Cerealia.ind" "Leguminosae" [25] "Lens.culinaris" "Pisum.sativum" [27] "Vicia.faba" "Vicia.sativa" [29] "Vicia.sp" "Lathyrus.cicera.sativus" [31] "Cicer.aretinum" "Cornus.mas" [33] "Corylus.avellana" "Ficus.carica" [35] "Fragaria.vesca" "Juglans.regia" [37] "Castanea.sativa" "Malus.domestica" [39] "Olea.europaea.L" "Prunus.cerasus" [41] "Prunus.avium" "Prunus.sp" [43] "Prunus.persica" "Prunus.domestica" [45] "Prunus.spinosa" "Rubus.fruticosus" [47] "Pyrus.communis" "Sambucus.nigra" [49] "Cucumis.melo" "Vitis.vinifera" [51] "Linum.usatissimus" "Sorbus.sp"
the century of interest.
The function archaeobotany_tables()
can be used to return the ubiquity, relative proportions or a print of the table with the sites from the chosen century. The comments in the code below explain the process.
Show the code
##FUNCTION FOR GENERATING CENTURY BASED
# - UBIQUITY
# - RELATIVE PROPORTIONS
# - A PRINT OF THE TABLE
<- function(x, century) {
archaeobotany_tables # Load the tidyverse library if it hasn't been loaded in the page before
library(tidyverse)
# Remove NAs
is.na(x)] <-0
x[
# Filter the table for the chosen century
# package: tidyverse
<- filter(x, data_valid_start <= century & data_valid_end >= century)
x
# The total of each row is needed to calculate the relative proportions
# Note: Calculation starts from column 14 because it is the first column with numerical data. If the table exported from the database changes, this number must be adjusted.
<- rowSums(x[,14:ncol(x)])
Total
# Subsetting the given dataframe by creating a new dataframe with fewer columns
<- data.frame(x$site_name, x$type_name,
plants $data_valid_start, x$data_valid_end,
x$culture_type, x[14:ncol(x)],
x
Total
)
# Calculating the relative proportions and rounding the results to 2 digits.
<- round(((x[14:ncol(x)]/Total)*100), digits=2)
Rel_Prop
# Ubiquity:
#Note: It is given by the no. of sites where the plant is present divided by the total of sites
# Note: Total of sites: (No. of rows - header row)
# Creating a new dataframe from the Relative Proportions one (Rel_Prop).
# Note: This can be done also from the original dataframe, it is not important since it is just a calculation based on presence/absence. I chose this dataframe as it has already the columns I need.
<- Rel_Prop
Pres_Abs
# If the value is > 0 it means that the plant is present: this line replaces this value with a 1 (indicating presence)
> 0] <- 1
Pres_Abs[Pres_Abs
# In how many sites is this plant present?
<- colSums(Pres_Abs)
Tot_sites_present
# Finally calculate ubiquity
# Note: The score is multiplied by 100 to obtain results in %
<- (Tot_sites_present / nrow(Pres_Abs))*100
Ubiquity
return(list(
Ubiquity_exp = Ubiquity,
Rel_Prop_exp = Rel_Prop,
Raw_Counts = plants
)) }
Rel_Prop_per_Century()
This function has two arguments: - a dataframe of the exported table of plants from the database (view_archaeobot.csv
).
- the century of interest. The function
Rel_Prop_per_Century()
can be used to return the relative proportions of each site from the chosen century. The comments in the code below explain the process.
Show the code
## Convert each site raw data into relative proportions
<- function(x, century) {
Rel_Prop_per_Century
# Remove NAs
is.na(x)] <-0
x[
# Filter the table for the chosen century
# package: tidyverse
library(tidyverse)
<- filter(x, data_valid_start <= century & data_valid_end >= century)
x
# Calculate the total of the row and divide each value by the total to get proportions
# round() is used to get two decimal values
<- rowSums(x[,14:ncol(x)])
Total_per_site <- round(((x[14:ncol(x)]/Total_per_site)*100), digits=2)
Rel_Prop_per_site
# Create new dataframe with the information we need
<- data.frame(
plants_rel_prop "Site" = x$site_name,
"Type" = x$type_name,
"From.Century" = x$data_valid_start,
"To.Century"= x$data_valid_end,
"Weight"=x$weight,
"Culture"=x$culture_type,
"x"=x$x,
"y"=x$y,
Rel_Prop_per_site
)
return(plants_rel_prop)
}
Ubiquity_macroreg_chrono()
This function has three arguments:
a dataframe of the exported condensed table of plants from the database (
Archaeobot_Condensed.csv
). It is a table of plants exported with their common English name and with a column of totals for each type of plant (Cereals, Fruit/Nuts, …).the macroregion of interest:
Southern Italy
,Central Italy
,Northern Italy
.the chronology of interest:
R
,LR
,EMA
,Ma
.
Show the code
<- function(df, macroregion, chronology) {
Ubiquity_macroreg_chrono
# Load the tidyverse library if it hasn't been loaded in the page before
library(tidyverse)
# Remove NAs
is.na(df)] <- 0
df[
# Filter the table for the chosen chronology and macroregion
# package: tidyverse
<- filter(df, Chronology == chronology & Macroregion == macroregion)
df.chronology
#Remove useless columns: Tots, unsp.cols
<- df.chronology[-c(23,24,32,33,56)]
df.chronology
# Create a counts dataframe where the taxa that are present will be stored as 1
<- df.chronology[14:ncol(df.chronology)]
df.counts >0] <- 1
df.counts[df.counts
# Create a dataframe with a sum of presences
<- colSums(df.counts)
df.sites.present
# Calculate ubiquity and round the value to 2 decimals
<- (df.sites.present / nrow(df.chronology))*100
Ubiquity <- round(Ubiquity, 2)
Ubiquity
# Add a category that explains what type of plant is it (useful for visualisation)
<- data.frame(Type=1:38)
Plants_Type $Type[1:9] <- "Cereals"
Plants_Type$Type[10:16] <- "Pulses"
Plants_Type$Type[17:38] <- "Fruits/Nuts"
Plants_Type
# Final dataframe that the function will return
<- cbind.data.frame("Chronology" = chronology,
Ubiquity "Macroregion" = macroregion,
"Plant"=names(Ubiquity),
"Plant.Type"=Plants_Type$Type,
"Ubiquity"= Ubiquity)
<- data.frame(Ubiquity, row.names = NULL)
Ubiquity
return(Ubiquity)
}
Ubiquity_type_chrono()
This function has three arguments:
a dataframe of the exported condensed table of plants from the database (
Archaeobot_Viz.csv
). It is a table of plants exported with their common English name (Cereals, Fruit/Nuts, …).the site typology of interest.
the chronology of interest:
R
,LR
,EMA
,Ma
.
# Ubiquity for type and chronology
<- function(df, type, chronology) {
Ubiquity_type_chrono # Load the tidyverse library if it hasn't been loaded in the page before
library(tidyverse)
# Remove NAs
is.na(df)] <- 0
df[
# Filter the table for the chosen chronology and macroregion
# package: tidyverse
<- filter(df, Type == type & Chronology == chronology)
df.chronology
# Selecting the first plant column. In this way I will avoid errors if the
# structure of the data set changes in the future.
<- which(names(Df_Cond_Plants) == "Common.Wheat")
first_col_index
# Create a counts dataframe where the taxa that are present will be stored as 1
<- df.chronology[first_col_index:ncol(df.chronology)]
df.counts >0] <- 1
df.counts[df.counts
# Create a dataframe with a sum of presences
<- colSums(df.counts)
df.sites.present
# Calculate ubiquity and round the value to 2 decimals
<- (df.sites.present / nrow(df.chronology))*100
Ubiquity <- round(Ubiquity, 2)
Ubiquity
# Returning a dataframe
<- as.data.frame(Ubiquity)
Ubiquity
# Transposing the dataframe to have a single row
<- t(Ubiquity)
Ubiquity
# Assigning the site type as the row name
row.names(Ubiquity) <- type
return(Ubiquity)
}
Zooarchaeology
Medians_Chrono_Zoo()
Code
<- function(x, Chrono) {
Medians_Chrono_Zoo library(tidyverse)
library(matrixStats)
<- filter(x, Chronology == Chrono)
x is.na(x)] <- 0 #Get rid of NAs
x[$Total <- (rowSums(x[,15:ncol(x)]))
x<- round(((x[,15:ncol(x)]/x$Total)), digits=2)
Rel_Prop
== 0] <- NA
Rel_Prop[Rel_Prop
<- apply(Rel_Prop, 2, weightedMedian, w=x[,7], na.rm=TRUE)
medians <- data.frame(Chrono = medians)
final_medians colnames(final_medians) <- Chrono
return (final_medians)
}
zooarch_tables_general()
This function has one argument: a dataframe of the exported condensed table of faunal remains from the database (Zooarch_Condensed.csv
). It is a table of animals exported with their common English name, columns with context information. Some of the columns are the sum of different columns, with the SQL
function COALESCE()
. For instance, the column Edible Wild Mammals
is the sum of values for red deer, roe deer, hare, wild boar.
Show the code
<- function(x) {
zooarch_tables_general library(matrixStats)
library(tidyverse)
# Remove NAs
is.na(x)] <- 0
x[
# The total of each row is needed to calculate the relative proportions
# Note: Calculation starts from column 15 because it is the first column
# with numerical data.
# If the table exported from the database changes, this number must be adjusted.
<- rowSums(x[,15:ncol(x)])
Total <- x[,c(2:ncol(x))]
zoo_subset
<- round(((x[,15:ncol(x)]/Total)*100), digits=2)
Rel_Prop <- data.frame(zoo_subset[,1:5], Rel_Prop)
Rel_prop_with_sites <- data.frame(zoo_subset[,1:13], Rel_Prop)
Rel_prop_xy
return(
list(
Rel_Prop_exp = Rel_prop_with_sites, #Rel Prop by century
Rel_Prop_exp_XY = Rel_prop_xy, # Rel props with coordinates
Means = colMeans(Rel_Prop), #Column means, by century
Medians = apply(Rel_Prop, 2, weightedMedian, w=x[,7], na.rm=TRUE),
RawCounts = zoo_subset #Original table, by century
)
) }
zooarch_tables_general
Show the code
<- function(x, century) {
zooarch_tables library(tidyverse)
library(matrixStats)
is.na(x)] <- 0 #Get rid of NAs
x[
# Need to filter the given table for the century
<- filter(x, x$From.Century <= century & x$To.Century >= century)
x
<- rowSums(x[,15:ncol(x)]) # Row wise total for rel prop
Total <- x[,c(2:5,7:10,11:ncol(x))]
zoo_subset
<- round(((x[,15:ncol(x)]/Total)*100), digits=2)
Rel_Prop <- data.frame(zoo_subset[,1:6], Rel_Prop) # Check if it is the 5th or 6th column
Rel_prop_with_sites <- data.frame(zoo_subset[,1:10], Rel_Prop) # Check if it is to 10 or to 13
Rel_prop_xy
return(
list(
Rel_Prop_exp = Rel_prop_with_sites, #Rel Prop by century
Rel_Prop_exp_XY = Rel_prop_xy,
#Means = colMeans(Rel_Prop), #Column means, by century
Means = apply(Rel_Prop, 2, weighted.mean, w=x[,7], na.rm=TRUE),
Medians = apply(Rel_Prop, 2, weightedMedian, w=x[,7], na.rm=TRUE),
RawCounts = zoo_subset #Original table, by century
)
) }