I'm now studying R, and now doing project about movie recommend algorithm.
I used movielense 100k data with recommenderlab library, and use these tutorials.
https://mitxpro.mit.edu/asset-v1%3AMITProfessionalX+DSx+2017_T1+type#asset+block#Module4_CS1_Movies.pdf
https://cran.r-project.org/web/packages/recommenderlab/vignettes/recommenderlab.pdf
I've now calculated sparsity, and splited data into train and test data.
And I want to make popularity recommendation code. My code is here:
install.packages("SnowballC")
install.packages("class")
install.packages("dbscan")
install.packages("proxy")
install.packages("recommenderlab")
install.packages("dplyr")
install.packages("tm")
install.packages("reshape2")
library(recommenderlab)
library(dplyr)
library(tm)
library(SnowballC)
library(class)
library(dbscan)
library(proxy)
library(reshape2)
#read data
data<- read.table('C:/Users/ginny/OneDrive/Documents/2018_1/dataanalytics/실습3/ml-100k/u.data')
#####raw data to matrix#####
data.frame2matrix = function(data, rowtitle, coltitle, datatitle,
rowdecreasing = FALSE, coldecreasing = FALSE,
default_value = NA) {
# check, whether titles exist as columns names in the data.frame data
if ( (!(rowtitle%in%names(data)))
|| (!(coltitle%in%names(data)))
|| (!(datatitle%in%names(data))) ) {
stop('data.frame2matrix: bad row-, col-, or datatitle.')
}
# get number of rows in data
ndata = dim(data)[1]
# extract rownames and colnames for the matrix from the data.frame
rownames = sort(unique(data[[rowtitle]]), decreasing = rowdecreasing)
nrows = length(rownames)
colnames = sort(unique(data[[coltitle]]), decreasing = coldecreasing)
ncols = length(colnames)
# initialize the matrix
out_matrix = matrix(NA,
nrow = nrows, ncol = ncols,
dimnames=list(rownames, colnames))
# iterate rows of data
for (i1 in 1:ndata) {
# get matrix-row and matrix-column indices for the current data-row
iR = which(rownames==data[[rowtitle]][i1])
iC = which(colnames==data[[coltitle]][i1])
# throw an error if the matrix entry (iR,iC) is already filled.
if (!is.na(out_matrix[iR, iC])) stop('data.frame2matrix: double entry in data.frame')
out_matrix[iR, iC] = data[[datatitle]][i1]
}
# set empty matrix entries to the default value
out_matrix[is.na(out_matrix)] = default_value
# return matrix
return(out_matrix)
}
#data 열 별로 이름 지정('' 안은 필요에 따라 변경 가능)
colnames(data)<-c('user_id','item_id','rating','timestamp')
#raw 데이터를 matrix로 변환
pre_data = data.frame2matrix(data, 'user_id', 'item_id', 'rating')
#matrix를 realratingmatrix로 변환
target_data<- as(as(pre_data, "matrix"), "realRatingMatrix")
data=data[,-which(names(data) %in% c("timestamp"))]
data
str(data)
summary(data)
hist(data$rating)
write.csv(data,"C:/Users/ginny/OneDrive/Documents/2018_1/dataanalytics/실습
3/u.csv")
Number_Ratings=nrow(data)
Number_Ratings
Number_Movies=length(unique(data$item_id))
Number_Movies
Number_Users=length(unique(data$user_id))
Number_Users
data1=data[data$user_id %in% names(table(data$user_id))
[table(data$user_id)>50],]
Number_Ratings1=nrow(data1)
Number_Movies1=length(unique(data1$item_id))
Number_Users1=length(unique(data1$user_id))
sparsity=((Number_Ratings1)*3*5*100)/((Number_Movies1)*(Number_Users1))
sparsity
install.packages("caTools")
library(caTools)
set.seed(10)
sample=sample.split(data1$rating, SplitRatio=0.75)
train=subset(data1, sample==TRUE)
test=subset(data1, sample==FALSE)
data2<-as.data.frame(data1)
data2
#matrix to realratingmatrix
target_data2<- as(as(pre_data2, "matrix"), "realRatingMatrix")
recommender_models<-recommenderRegistry$get_entry(dataType =
"realRatingMatrix")
recomm_model <- Recommender(data2$rating, method = "POPULAR")
I used data2 realRatingMatrix, but when I run last line, error like this happen:
Error in (function (classes, fdef, mtable) : unable to find an
inherited method for function ‘Recommender’ for signature ‘"integer"’
Can anybody help me what's wrong with it?
Related
I am a beginner in R. I have a row standardized matrix (1542x1542) that I created in excel and saved as a .csv file. I am trying to use the matrix in R to calculate Moran's I. -using the following command:
# Weights Matrix Based on Connectivity
sw <- read.csv(file = "20210929_Weights_Matrix.csv")
sw.2.mat <- as.matrix(sw)
## mat to listw
mat2listw(sw.2.mat)
dnn.2.listw = nb2listw(sw.2.mat, zero.policy=T)
However, when I run the command I get the following errors
sw <- read.csv(file = "20210929_Weights_Matrix.csv")
sw.2.mat <- as.matrix(sw)
mat2listw(sw.2.mat) Error in mat2listw(sw.2.mat) : x must be a square matrix
dnn.2.listw = nb2listw(sw.2.mat, zero.policy=T)
Error in nb2listw(sw.2.mat, zero.policy = T) : Not a neighbours list
When I try to add an additional row in excel, I get the following error in R
sw <- read.csv(file = "20210929_Weights_Matrix.csv")
sw.2.mat <- as.matrix(sw)
## mat to listw
mat2listw(sw.2.mat)
Error in if (any(x < 0)) stop("values in x cannot be negative") :
missing value where TRUE/FALSE needed
dnn.2.listw = nb2listw(sw.2.mat, zero.policy=T)
Error in nb2listw(sw.2.mat, zero.policy = T) : Not a neighbours list
Could someone please help? Is there a possibility I can share my excel?
Every time I run the script it always gives me an error: Error in { : task 1 failed - "could not find function "%>%""
I already check every post on this forum and tried to apply it but no one works.
Please advise any solution.
Please note: I have only 2 cores on my PC.
My code is as follows:
library(dplyr) # For basic data manipulation
library(ncdf4) # For creating NetCDF files
library(tidync) # For easily dealing with NetCDF data
library(ggplot2) # For visualising data
library(doParallel) # For parallel processing
MHW_res_grid <- readRDS("C:/Users/SUDHANSHU KUMAR/Desktop/MTech Project/R/MHW_result.Rds")
# Function for creating arrays from data.frames
df_acast <- function(df, lon_lat){
# Force grid
res <- df %>%
right_join(lon_lat, by = c("lon", "lat")) %>%
arrange(lon, lat)
# Convert date values to integers if they are present
if(lubridate::is.Date(res[1,4])) res[,4] <- as.integer(res[,4])
# Create array
res_array <- base::array(res[,4], dim = c(length(unique(lon_lat$lon)), length(unique(lon_lat$lat))))
dimnames(res_array) <- list(lon = unique(lon_lat$lon),
lat = unique(lon_lat$lat))
return(res_array)
}
# Wrapper function for last step before data are entered into NetCDF files
df_proc <- function(df, col_choice){
# Determine the correct array dimensions
lon_step <- mean(diff(sort(unique(df$lon))))
lat_step <- mean(diff(sort(unique(df$lat))))
lon <- seq(min(df$lon), max(df$lon), by = lon_step)
lat <- seq(min(df$lat), max(df$lat), by = lat_step)
# Create full lon/lat grid
lon_lat <- expand.grid(lon = lon, lat = lat) %>%
data.frame()
# Acast only the desired column
dfa <- plyr::daply(df[c("lon", "lat", "event_no", col_choice)],
c("event_no"), df_acast, .parallel = T, lon_lat = lon_lat)
return(dfa)
}
# We must now run this function on each column of data we want to add to the NetCDF file
doParallel::registerDoParallel(cores = 2)
prep_dur <- df_proc(MHW_res_grid, "duration")
prep_max_int <- df_proc(MHW_res_grid, "intensity_max")
prep_cum_int <- df_proc(MHW_res_grid, "intensity_cumulative")
prep_peak <- df_proc(MHW_res_grid, "date_peak")
I have a question regarding LDA in topicmodels in R.
I created a matrix with documents as rows, terms as columns, and the number of terms in a document as respective values from a data frame. While I wanted to start LDA, I got an Error Message stating "Error in !all.equal(x$v, as.integer(x$v)) : invalid argument type" . The data contains 1675 documents of 368 terms. What can I do to make the code work?
library("tm")
library("topicmodels")
data_matrix <- data %>%
group_by(documents, terms) %>%
tally %>%
spread(terms, n, fill=0)
doctermmatrix <- as.DocumentTermMatrix(data_matrix, weightTf("data_matrix"))
lda_head <- topicmodels::LDA(doctermmatrix, 10, method="Gibbs")
Help is much appreciated!
edit
# Toy Data
documentstoy <- c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16)
meta1toy <- c(3,4,1,12,1,2,3,5,1,4,2,1,1,1,1,1)
meta2toy <- c(10,0,10,1,1,0,1,1,3,3,0,0,18,1,10,10)
termstoy <- c("cus","cus","bill","bill","tube","tube","coa","coa","un","arc","arc","yib","yib","yib","dar","dar")
toydata <- data.frame(documentstoy,meta1toy,meta2toy,termstoy)
So I looked inside the code and apparently the lda() function only accepts integers as the input so you have to convert your categorical variables as below:
library('tm')
library('topicmodels')
documentstoy <- c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16)
meta1toy <- c(3,4,1,12,1,2,3,5,1,4,2,1,1,1,1,1)
meta2toy <- c(10,0,10,1,1,0,1,1,3,3,0,0,18,1,10,10)
toydata <- data.frame(documentstoy,meta1toy,meta2toy)
termstoy <- c("cus","cus","bill","bill","tube","tube","coa","coa","un","arc","arc","yib","yib","yib","dar","dar")
toy_unique = unique(termstoy)
for (i in 1:length(toy_unique)){
A = as.integer(termstoy == toy_unique[i])
toydata[toy_unique[i]] = A
}
lda_head <- topicmodels::LDA(toydata, 10, method="Gibbs")
Could anyone help me with some little problem?
When I plot the frontier I get the following message: "Error in colnames<-(tmp, value = c("targetRisk", "targetReturn")) :
attempt to set 'colnames' on an object with less than two dimensions"(see below for detail). How could I solve this. Thanks a lot.
Portfolio construction & Optimisation
Assets: LUTAX, PFODX,BRGAX,GFAFX,NMSAX,EGINX,IPOYX,SCWFX,FGLDX,PAGEX
Getting monthly returns of the assets
library(quantmod)
library(tseries)
library(timeSeries)
LUTAX <- monthlyReturn((getSymbols("LUTAX",auto.assign=FALSE)[,4]),type = "arithmetic")
colnames(LUTAX) <- c("LUTAX")
PFODX <- monthlyReturn((getSymbols("PFODX",auto.assign=FALSE)[,4]),type = "arithmetic")
colnames(PFODX) <- c("PFODX")
BRGAX <- monthlyReturn((getSymbols("BRGAX",auto.assign=FALSE)[,4]),type = "arithmetic")
colnames(BRGAX) <- c("BRGAX")
GFAFX <- monthlyReturn((getSymbols("GFAFX",auto.assign=FALSE)[,4]),type = "arithmetic")
colnames(GFAFX) <- c("GFAFX")
NMSAX <- monthlyReturn((getSymbols("NMSAX",auto.assign=FALSE)[,4]),type = "arithmetic")
colnames(NMSAX) <- c("NMSAX")
EGINX <- monthlyReturn((getSymbols("EGINX",auto.assign=FALSE)[,4]),type = "arithmetic")
colnames(EGINX) <- c("EGINX")
IPOYX <- monthlyReturn((getSymbols("IPOYX",auto.assign=FALSE)[,4]),type = "arithmetic")
colnames(IPOYX) <- c("IPOYX")
SCWFX <- monthlyReturn((getSymbols("SCWFX",auto.assign=FALSE)[,4]),type = "arithmetic")
colnames(SCWFX) <- c("SCWFX")
FGLDX <- monthlyReturn((getSymbols("FGLDX",auto.assign=FALSE)[,4]),type = "arithmetic")
colnames(FGLDX) <- c("FGLDX")
PAGEX <- monthlyReturn((getSymbols("PAGEX",auto.assign=FALSE)[,4]),type = "arithmetic")
colnames(PAGEX) <- c("PAGEX")
Merging returns of the assets (excluding NA's)
portfolio_returns <- merge(LUTAX, PFODX,BRGAX,GFAFX,NMSAX,EGINX,IPOYX,SCWFX,FGLDX,PAGEX,all=F)
data <- as.timeSeries(portfolio_returns)
Optimisation portfolio
library(fPortfolio)
spec <- portfolioSpec()
setNFrontierPoints <- 25
setSolver(spec) <- "solveRquadprog"
constraints <- c("minW[1:1]=0.12","maxW[1:1]=0.18","minW[2:2]=0.12","maxW[2:2]=0.18",
"minW[3:3]=0.10","maxW[3:3]=0.15","minW[4:4]=0.08","maxW[4:4]=0.12",
"minW[5:5]=0.08","maxW[5:5]=0.12","minW[6:6]=0.05","maxW[6:6]=0.10",
"minW[7:7]=0.05","maxW[7:7]=0.10","minW[8:8]=0.08","maxW[8:8]=0.12",
"minW[9:9]=0.05","maxW[9:9]=0.10","minW[10:10]=0.08","maxW[10:10]=0.12",
"minsumW[c(1:1,2:2)]=0.27","maxsumW[c(1:1,2:2)]=0.33",
"minsumW[c(3:3,4:4,6:6,10:10)]=0.37","maxsumW[c(3:3,4:4,6:6,10:10)]=0.43",
"minsumW[c(5:5,7:7,8:8,9:9)]=0.27","maxsumW[c(5:5,7:7,8:8,9:9)]=0.33",
"maxsumW[c(1:1,2:2,3:3,4:4,5:5,6:6,7:7,8:8,9:9,10:10)]=1")
portfolioConstraints(data,spec,constraints)
frontier<- portfolioFrontier(data,spec,constraints)
print(frontier)
tailoredFrontierPlot(frontier)
After running the last command above I get the following message: "Error in colnames<-(tmp, value = c("targetRisk", "targetReturn")) :
attempt to set 'colnames' on an object with less than two dimensions"
I'm trying to use cor.ci to obtain polychoric correlations with significance tests, but it keeps giving me an error message. Here is the code:
install.packages("Hmisc")
library(Hmisc)
mydata <- spss.get("S-IAT for R.sav", use.value.labels=TRUE)
install.packages('psych')
library(psych)
poly.example <- cor.ci(mydata(nvar = 10,n = 100)$items,n.iter = 10,poly = TRUE)
poly.example
print(corr.test(poly.example$rho), short=FALSE)
Here is the error message it gives:
> library(psych)
> poly.example <- cor.ci(mydata(nvar = 10,n = 100)$items,n.iter = 10,poly = TRUE)
Error in cor.ci(mydata(nvar = 10, n = 100)$items, n.iter = 10, poly = TRUE) :
could not find function "mydata"
> poly.example
Error: object 'poly.example' not found
> print(corr.test(poly.example$rho), short=FALSE)
Error in is.data.frame(x) : object 'poly.example' not found
How can I make it recognize mydata and/or select certain variables from this dataset for the analysis? I got the above code from here:
Polychoric correlation matrix with significance in R
Thanks!
You have several problems.
1) As previously commented upon, you are treating mydata as a function, but you need to treat it as a data.frame. Thus the call should be
poly.example <- cor.ci(mydata,n.iter = 10,poly = TRUE)
If you are trying to just get the first 100 cases and the first 10 variables, then
poly.example <- cor.ci(mydata[1:10,1:100],n.iter = 10,poly = TRUE)
2) Then, you do not want to run corr.test on the resulting correlation matrix. corr.test should be run on the data.
print(corr.test(mydata[1:10,1:100],short=FALSE)
Note that corr.test is testing the Pearson correlation.