How to map values of a category to other category in the same variable?

r
transformations

#1

I am trying to transfer the values of a category of observations of a variable to another category of observations of the same variable
here is my code:
Train_Mart$Item_Fat_Content<- recode(Train_Mart$Item_Fat_Content, "c('LF','Low Fat') = 'Low Fat'")
here is the error:
Error: All replacements must be named
Data:
Train_UWu5bXk.csv (849.2 KB)
I have also uploaded the data.
Thanks in advance


#2

@siddhant96goswami
The code worked fine for me on the train data that you gave. Check if you done some mistake earlier. Share the code if the error pops up again.


#3

#Big Mart Sales prediction:- The aim is to find out the sales of each product at a particular store
#Loading the libraries
library(readr)
library(ggplot2)
library(scales)
library(randomForest)
library(dplyr)
library(mlr)
library(gmodels)
library(rpart)
library(stats)
library(Amelia)
library(mice)
#Loading and checking the data
Train_Mart<- read_csv(“D:/Alyss_AnalyticsReport/Big_Mart_Sales_Prediction/Train_UWu5bXk.csv”)
Test_Mart<- read_csv(“D:/Alyss_AnalyticsReport/Big_Mart_Sales_Prediction/Test_u94Q5KV.csv”)
str(Train_Mart)
#Step 1:Univariate Analysis
#The categorical variables are :- FatContent,ItemType,EstablishmentYear,OutletSize,LocationType,OutletType
#The continous variables are :- Item_Weight,Item_Visibility,Item_Mrp
#Combining categorical variables
Train_Mart_cat <- subset(Train_Mart, select = c(Item_Fat_Content,Item_Type,Outlet_Establishment_Year,Outlet_Size,Outlet_Location_Type,Outlet_Type))
Train_Mart_cont <- subset(Train_Mart, select = c(Item_Weight,Item_Visibility,Item_MRP))
apply(Train_Mart_cat, 2, function(x){length(unique(x))})
colSums(is.na(Train_Mart))#Checking NA values in dataset
colSums(is.na(Test_Mart))
as.matrix(prop.table(table(Train_Mart$Item_Type)))
summary(Train_Mart)
#analyzing Item_type
table(Train_Mart$Item_Type)
#print the percentage in decreasing order
head(round(sort(prop.table(table(Train_Mart$Item_Type)), decreasing = TRUE),3),16)
#we will keep the observation in mind
head(round(sort(prop.table(table(Train_Mart$Item_Fat_Content)), decreasing = TRUE),6))
#After evaluating each variable , now we moved to next step
#Step 2:Multivariate Analysis
#We will plot a boxplot between Item_Type and Item_Weight to find the relation between them
ggplot(Train_Mart, aes(Item_Type, Item_Weight)) + geom_boxplot() + labs(title=“Boxplot”)
ggplot(Train_Mart, aes(Outlet_Establishment_Year, Item_Outlet_Sales)) + geom_boxplot() + labs(title=“Boxplot”)
CrossTable(Train_Mart$Outlet_Location_Type,Train_Mart$Outlet_Type)
cat(paste(‘It is clear from the table that Tier1 cities have more Supermarket Type 1 than grocery store and dont have rest of the types of Supermarket’))
cat(paste(‘Tier2 cities only have Supermarket Type 2…Looks like Tier 2 citizens are much richer than Tier1’))
cat(paste(‘Tier3 cities have 935 Type 3, 928 Type2 ,932 Type 1supermarket and 555 Grocery stores’))
#This shows that Tier 2 cities should have better sales because of Type1 supermarket but Tier3 has maximum number of stores
#let’s find out the realtion between type of Item and it’s sales
ggplot(Train_Mart, aes(Item_Type, Item_Outlet_Sales)) + geom_boxplot() + labs(title=“Item_Type and Sales”)
#The sales of most of the items is almost same ,However seafood , snacks and starchy foods have a bit of larger distribution
#Now let’s take a look between location type and item type
ggplot(data = Train_Mart, aes(Outlet_Location_Type, Item_Type)) + geom_jitter() + labs(title=‘Scatterplot’)#Well nothing significant found
#Plotting stack bar chart between outlet type and Item outlet sales
ggplot(Train_Mart, aes(Outlet_Type, fill= Item_Outlet_Sales)) + geom_bar()+ labs(title= “Stacked Bar Chart”, x=“Outlet_Type”,y=“Item_Outlet_Sales”) + theme_bw()
#This clearly shows that “Supermarket type 1” boosts most of the sales
#Step 3:Missing Value Traetment
colSums(is.na(Train_Mart))
#Item_Weight and Outlet_Size have NA values in Train dataset
#To replace na values we will use Amelia function
AmeliaView()
#We will store the new values in Train_Mart dataset
sample <- Train_UWu5bXk_imp1$Item_Weight
Train_Mart$Item_Weight <- sample
factor_vars <- c(‘Item_Fat_Content’,‘Item_Type’, ‘Outlet_Size’,‘Outlet_Location_Type’,‘Outlet_Type’)
Train_Mart[factor_vars] <- lapply(Train_Mart[factor_vars], function(x) as.factor(x))
tempdata <- mice(Train_Mart, m=5, method = ‘fastpmm’, maxit = 50, seed = 500)
mice_output <- complete(tempdata)
Train_Mart$Outlet_Size <- mice_output$Outlet_Size
#This completes our missing value treatment for Train dataset
#Same method will be applied for Test dataset
AmeliaView()
Test_Mart$Item_Weight <- Test_u94Q5KV_imp1$Item_Weight
Test_Mart[factor_vars] <- lapply(Test_Mart[factor_vars], function(x) as.factor(x))
tempdata <- mice(Test_Mart, m=5, method = ‘fastpmm’, maxit = 50, seed = 500)
mice_output <- complete(tempdata)
Test_Mart$Outlet_Size <- mice_output$Outlet_Size
ggplot(Train_Mart, aes(Item_Identifier, Item_Weight)) + geom_boxplot() + labs(title=“Boxplot”)
#Step 5:Variable Transformation
Train_Mart$Item_Fat_Content<- recode(Train_Mart$Item_Fat_Content, “c(‘LF’,‘Low Fat’) = ‘Low Fat’”)
#This is the code