How to map values of a category to other category in the same variable?



I am trying to transfer the values of a category of observations of a variable to another category of observations of the same variable
here is my code:
Train_Mart$Item_Fat_Content<- recode(Train_Mart$Item_Fat_Content, "c('LF','Low Fat') = 'Low Fat'")
here is the error:
Error: All replacements must be named
Train_UWu5bXk.csv (849.2 KB)
I have also uploaded the data.
Thanks in advance


The code worked fine for me on the train data that you gave. Check if you done some mistake earlier. Share the code if the error pops up again.


#Big Mart Sales prediction:- The aim is to find out the sales of each product at a particular store
#Loading the libraries
#Loading and checking the data
Train_Mart<- read_csv(“D:/Alyss_AnalyticsReport/Big_Mart_Sales_Prediction/Train_UWu5bXk.csv”)
Test_Mart<- read_csv(“D:/Alyss_AnalyticsReport/Big_Mart_Sales_Prediction/Test_u94Q5KV.csv”)
#Step 1:Univariate Analysis
#The categorical variables are :- FatContent,ItemType,EstablishmentYear,OutletSize,LocationType,OutletType
#The continous variables are :- Item_Weight,Item_Visibility,Item_Mrp
#Combining categorical variables
Train_Mart_cat <- subset(Train_Mart, select = c(Item_Fat_Content,Item_Type,Outlet_Establishment_Year,Outlet_Size,Outlet_Location_Type,Outlet_Type))
Train_Mart_cont <- subset(Train_Mart, select = c(Item_Weight,Item_Visibility,Item_MRP))
apply(Train_Mart_cat, 2, function(x){length(unique(x))})
colSums( NA values in dataset
#analyzing Item_type
#print the percentage in decreasing order
head(round(sort(prop.table(table(Train_Mart$Item_Type)), decreasing = TRUE),3),16)
#we will keep the observation in mind
head(round(sort(prop.table(table(Train_Mart$Item_Fat_Content)), decreasing = TRUE),6))
#After evaluating each variable , now we moved to next step
#Step 2:Multivariate Analysis
#We will plot a boxplot between Item_Type and Item_Weight to find the relation between them
ggplot(Train_Mart, aes(Item_Type, Item_Weight)) + geom_boxplot() + labs(title=“Boxplot”)
ggplot(Train_Mart, aes(Outlet_Establishment_Year, Item_Outlet_Sales)) + geom_boxplot() + labs(title=“Boxplot”)
cat(paste(‘It is clear from the table that Tier1 cities have more Supermarket Type 1 than grocery store and dont have rest of the types of Supermarket’))
cat(paste(‘Tier2 cities only have Supermarket Type 2…Looks like Tier 2 citizens are much richer than Tier1’))
cat(paste(‘Tier3 cities have 935 Type 3, 928 Type2 ,932 Type 1supermarket and 555 Grocery stores’))
#This shows that Tier 2 cities should have better sales because of Type1 supermarket but Tier3 has maximum number of stores
#let’s find out the realtion between type of Item and it’s sales
ggplot(Train_Mart, aes(Item_Type, Item_Outlet_Sales)) + geom_boxplot() + labs(title=“Item_Type and Sales”)
#The sales of most of the items is almost same ,However seafood , snacks and starchy foods have a bit of larger distribution
#Now let’s take a look between location type and item type
ggplot(data = Train_Mart, aes(Outlet_Location_Type, Item_Type)) + geom_jitter() + labs(title=‘Scatterplot’)#Well nothing significant found
#Plotting stack bar chart between outlet type and Item outlet sales
ggplot(Train_Mart, aes(Outlet_Type, fill= Item_Outlet_Sales)) + geom_bar()+ labs(title= “Stacked Bar Chart”, x=“Outlet_Type”,y=“Item_Outlet_Sales”) + theme_bw()
#This clearly shows that “Supermarket type 1” boosts most of the sales
#Step 3:Missing Value Traetment
#Item_Weight and Outlet_Size have NA values in Train dataset
#To replace na values we will use Amelia function
#We will store the new values in Train_Mart dataset
sample <- Train_UWu5bXk_imp1$Item_Weight
Train_Mart$Item_Weight <- sample
factor_vars <- c(‘Item_Fat_Content’,‘Item_Type’, ‘Outlet_Size’,‘Outlet_Location_Type’,‘Outlet_Type’)
Train_Mart[factor_vars] <- lapply(Train_Mart[factor_vars], function(x) as.factor(x))
tempdata <- mice(Train_Mart, m=5, method = ‘fastpmm’, maxit = 50, seed = 500)
mice_output <- complete(tempdata)
Train_Mart$Outlet_Size <- mice_output$Outlet_Size
#This completes our missing value treatment for Train dataset
#Same method will be applied for Test dataset
Test_Mart$Item_Weight <- Test_u94Q5KV_imp1$Item_Weight
Test_Mart[factor_vars] <- lapply(Test_Mart[factor_vars], function(x) as.factor(x))
tempdata <- mice(Test_Mart, m=5, method = ‘fastpmm’, maxit = 50, seed = 500)
mice_output <- complete(tempdata)
Test_Mart$Outlet_Size <- mice_output$Outlet_Size
ggplot(Train_Mart, aes(Item_Identifier, Item_Weight)) + geom_boxplot() + labs(title=“Boxplot”)
#Step 5:Variable Transformation
Train_Mart$Item_Fat_Content<- recode(Train_Mart$Item_Fat_Content, “c(‘LF’,‘Low Fat’) = ‘Low Fat’”)
#This is the code