I am trying to filter a huge txt file line by line, which pure R is not so good at. So, I wrote a c function that hopefully can speed up the process. Below is a minimum working example of filter.c, just for the demo purpose.
Currently, I have tried .C to do the trick without luck. Here is my attempt.
built filter.so using gcc -shared -o lfilter.so -fPIC filter.c
dyn.load("lfilter.so")
.C("filter", as.character("I1.txt"), as.character("I1.out.txt"), as.character("filter.txt"))
R crashed on me with 3rd step. But unfortunately, I have to stay within R.
Any help or suggestions are welcome.
filter.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define LL 256
int get_row(char *filename)
{
char line[LL];
int i = 0;
FILE *stream = fopen(filename, "r");
while (fgets(line, LL, stream))
{
i++;
}
fclose(stream);
return i;
}
void filter(char *R1_in,
char *R1_out,
char *filter)
{
char R1_line[LL];
FILE *R1_stream = fopen(R1_in, "r");
FILE *R1_out_stream = fopen(R1_out,"w");
/*****************loading filters*******************/
int nrows = get_row(filter);
FILE *filter_stream = fopen(filter, "r");
char **filter_list = (char **)malloc(nrows * sizeof(*filter_list));
for(int i = 0; i <nrows; i++)
{
filter_list[i] = malloc(LL * sizeof(char));
fgets(filter_list[i], LL, filter_stream);
}
fclose(filter_stream);
/*****************filtering*******************/
while (fgets(R1_line, LL, R1_stream))
{
// printf("%s", R1_line);
for(int i = 0; i<nrows; i++)
{
if(strcmp(R1_line, filter_list[i])==0)
{
fprintf(R1_out_stream, "%s", R1_line);
break;
}
}
}
printf("\n");
for(int i=0; i<nrows; i++)
{
free(filter_list[i]);
}
free(filter_list);
fclose(R1_stream);
fclose(R1_out_stream);
}
// int main()
// {
// char R1_in[] = "I1.txt";
// char R1_out[] = "I1.out.txt";
//
// char filters[] = "filter.txt";
//
// filter(R1_in, R1_out, filters);
// return 0;
// }
I1.txt
aa
baddf
ca
daa
filter.txt
ca
cb
Expected Output I1.out.txt
ca
I had never used R before. But, I was a bit intrigued. So, I installed R and did a little research.
Everything in R [using the .C interface] is passed to the C function as a pointer.
From: https://www.r-bloggers.com/2014/02/three-ways-to-call-cc-from-r/ we have:
Inside a running R session, the .C interface allows objects to be directly accessed in an R session’s active memory. Thus, to write a compatible C function, all arguments must be pointers. No matter the nature of your function’s return value, it too must be handled using pointers. The C function you will write is effectively a subroutine.
So, if we pass an integer, the C function argument must be:
int *
I took a guess that:
char *
Needed to be:
char **
And, then tested it with:
#include <stdio.h>
#define SHOW(_sym) \
show(#_sym,_sym)
static void
show(const char *sym,char **ptr)
{
char *str;
printf("%s: ptr=%p",sym,ptr);
str = *ptr;
printf(" str=%p",str);
printf(" '%s'\n",str);
}
void
filter(char **R1_in,char **R1_out,char **filt)
{
SHOW(R1_in);
SHOW(R1_out);
SHOW(filt);
}
Here is the output:
> dyn.load("filter.so");
> .C("filter",
+ as.character("abc"),
+ as.character("def"),
+ as.character("ghi"))
R1_in: ptr=0x55a9f8cb1798 str=0x55a9f9de9760 'abc'
R1_out: ptr=0x55a9f8cb1818 str=0x55a9f9de9728 'def'
filt: ptr=0x55a9f8cb1898 str=0x55a9f9de96f0 'ghi'
[[1]]
[1] "abc"
[[2]]
[1] "def"
[[3]]
[1] "ghi"
> q()
So, you want:
void
filter(char **R1_in, char **R1_out, char **filt)
{
FILE *R1_stream = fopen(*R1_in, "r");
// ...
}
Related
I basically wrote a code in which I take two command line arguments one being the type of file that I want to search in my directory and they other being the amount I want(which is not implemented yet, but I can fix that)
The code is like so:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#define sizeFileName 500
#define filesMax 5000
int cmpfunc( const void *a, const void *b) {
return *(char*)a + *(char*)b;
}
int main( int argc, char ** argv) {
FILE * fp = popen( "find . -type f", "r");
char * type = argv[1];
char * extension = ".";
char* tExtension;
tExtension = malloc(strlen(type)+1+4);
strcpy(tExtension, extension);
strcat(tExtension, type);
// printf("%s\n",tExtension);
int amount = atoi(argv[2]);
//printf("%d\n",amount);
char buff[sizeFileName];
int nFiles = 0;
char * files[filesMax];
while(fgets(buff,sizeFileName,fp)) {
int leng = strlen(buff) - 1;
if (strncmp(buff + leng - 4, tExtension, 4) == 0){
files[nFiles] = strndup(buff,leng);
//printf("\t%s\n", files[nFiles]);
nFiles ++;
}
}
fclose(fp);
printf("Found %d files\n", nFiles);
long long totalBytes = 0;
struct stat st;
// sorting based on byte size from greatest to least
qsort(files, (size_t) strlen(files), (size_t) sizeof(char), cmpfunc);
for(int i = 0;i< nFiles; i ++) {
if(0!= stat(files[i],&st)){
perror("stat failed:");
exit(-1);
}
totalBytes += st.st_size;
printf("%s : %ld\n",files[i],st.st_size);
}
printf("Total size: %lld\n", totalBytes);
// clean up
for(int i = 0; i < nFiles ; i ++ ) {
free(files[i]);
}
return 0;
}
So far I have every section set up properly, upon running the code say $./find ini 5, it would print out all the ini files followed by their byte size(it's currently ignore the 5). However, for the qsort(), I'm not exactly sure how I would sort the contents of char * files as while it holds the pathnames, I had to use stat to get the byte sizes, how would I print out a sorted version of my print statements featuring the first statement being the most bytes and finishes at the least bytes?
If we suppose your input is valid, your question could be simplified with:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define filesMax 5000
int cmpfunc(const void const *a, const void *b) { return *(char *)a + *(char *)b; }
int main(void) {
int nFiles = 4;
char *files[filesMax] = {"amazing", "hello", "this is a file", "I'm a bad file"};
qsort(files, strlen(files), sizeof(char), cmpfunc);
for (int i = 0; i < nFiles;; i++) {
printf("%s\n", files[i]);
}
}
If you compile with warning that give you:
source_file.c:11:23: warning: incompatible pointer types passing 'char *[5000]' to parameter of type 'const char *' [-Wincompatible-pointer-types]
qsort(files, strlen(files), sizeof(char), cmpfunc);
^~~~~
qsort() expect the size of your array (or in your case a subsize) and it's also expect the size of one element of your array. In both you wrongly give it to it. Also, your compare function doesn't compare anything, you are currently adding the first bytes of both pointer of char, that doesn't make a lot of sense.
To fix your code you must write:
qsort(files, nFiles, sizeof *files, &cmpfunc);
and also fix your compare function:
int cmpfunc_aux(char * const *a, char * const *b) { return strcmp(*a, *b); }
int cmpfunc(void const *a, void const *b) { return cmpfunc_aux(a, b); }
also size should be of type size_t:
size_t nFiles = 0;
Don't forget that all informations about how to use a function are write in their doc.
how would I print out a sorted version of my print statements featuring the first statement being the most bytes and finishes at the least bytes?
Your code don't show any clue that your are trying to do that, you are currently storing name file and only that. How do you expect sort your file with an information you didn't acquired ?
However, that simple create a struct that contain both file name and size, acquire information needed to sort it and sort it:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <inttypes.h>
struct file {
off_t size;
char *name;
};
int cmpfunc_aux(struct file const *a, struct file const *b) {
if (a->size > b->size) {
return -1;
} else if (a->size < b->size) {
return 1;
} else {
return 0;
}
}
int cmpfunc(void const *a, void const *b) { return cmpfunc_aux(a, b); }
#define filesMax 5000
int main(void) {
size_t nFiles = 4;
struct file files[filesMax] = {{42, "amazing"},
{21, "hello"},
{168, "this is a file"},
{84, "I'm a bad file"}};
qsort(files, nFiles, sizeof *files, &cmpfunc);
for (size_t i = 0; i < nFiles; i++) {
printf("%s, %" PRId64 "\n", files[i].name, (intmax_t)files[i].size);
}
}
The function cmpfunc() provided adds the first character of each string, and that's not a proper comparison function (it should give a opposite sign value when you switch the parameters, e.g. if "a" and "b" are the strings to compare, it adds the first two characters of both strings, giving 97+98 == 195, which is positive on unsigned chars, then calling with "b" and "a" should give a negative number (and it again gives you 98 + 97 == 195), more on, it always gives the same result ---even with signed chars--- so it cannot be used as a sorting comparator)
As you are comparing strings, why not to use the standard library function strcmp(3) which is a valid comparison function? It gives a negative number if first string is less lexicographically than the second, 0 if both are equal, and positive if first is greater lexicographically than the second.
if your function has to check (and sort) by the lenght of the filenames, then you can define it as:
int cmpfunc(char *a, char *b) /* yes, you can define parameters as char * */
{
return strlen(a) - strlen(b);
}
or, first based on file length, then lexicographically:
int cmpfunc(char *a, char *b)
{
int la = strlen(a), lb = strlen(b);
if (la != lb) return la - lb;
/* la == lb, so we must check lexicographycally */
return strcmp(a, b);
}
Now, to continue helping you, I need to know why do you need to sort anything, as you say that you want to search a directory for a file, where does the sorting take place in the problem?
I have recently started programming in UNIX environment. I need to write a program which creates an empty file with name and size given in the terminal using this commands
gcc foo.c -o foo.o
./foo.o result.txt 1000
Here result.txt means the name of the newly created file, and 1000 means the size of the file in bytes.
I know for sure that lseek function moves the file offset, but the trouble is that whenever I run the program it creates a file with a given name, however the size of the file is 0.
Here is the code of my small program.
#include <unistd.h>
#include <stdio.h>
#include <fcntl.h>
#include <ctype.h>
#include <sys/types.h>
#include <sys/param.h>
#include <sys/stat.h>
int main(int argc, char **argv)
{
int fd;
char *file_name;
off_t bytes;
mode_t mode;
if (argc < 3)
{
perror("There is not enough command-line arguments.");
//return 1;
}
file_name = argv[1];
bytes = atoi(argv[2]);
mode = S_IWUSR | S_IWGRP | S_IWOTH;
if ((fd = creat(file_name, mode)) < 0)
{
perror("File creation error.");
//return 1;
}
if (lseek(fd, bytes, SEEK_SET) == -1)
{
perror("Lseek function error.");
//return 1;
}
close(fd);
return 0;
}
If you aren't allowed to use any other functions to assist in creating a "blank" text file, why not change your file mode on creat() then loop-and-write:
int fd = creat(file_name, 0666);
for (int i=0; i < bytes; i++) {
int wbytes = write(fd, " ", 1);
if (wbytes < 0) {
perror("write error")
return 1;
}
}
You'll want to have some additional checks here but, that would be the general idea.
I don't know whats acceptable in your situation but, possibly adding just the write() call after lseek() even:
// XXX edit to include write
if ((fd = creat(file_name, 0666)) < 0) {
perror("File creation error");
//return 1;
}
// XXX seek to bytes - 1
if (lseek(fd, bytes - 1, SEEK_SET) == -1) {
perror("lseek() error");
//return 1;
}
// add this call to write a single byte # position set by lseek
if (write(fd, " ", 1) == -1) {
perror("write() error");
//return 1;
}
close(fd);
return 0;
I am writing a data logger and would like to keep the files limited to a specific number of entries. I am trying to write this bit of code in the setup, so that when the Arduino powers on, it will write to a new file just to keep things simple. However, when I try to open the file I can't, although I am not sure why. Can anyone offer any explanation?
char *fileName; //global name
File logFile; //global file
//everything else is in setup()
char * topPart = "/Data/Data"; //first part of every file
char * lowerPart = ".txt"; // jus the extention
char * itter; //used to hold the char of i later
fileName = "/Data/Data.txt"; //start with the first file possible.
for(int i=0; i<=100;i++) {
if(!SD.exists(fileName)) {
Serial.print("opening file: ");
Serial.print(fileName);
logFile = SD.open(fileName, FILE_WRITE);
if(logFile) {
logFile.println("I made it");
Serial.println("in the file");
}
if(!logFile) {
Serial.println("somthing bad");
}
break;
} else {
itter = (char *)(i+48);
strcpy(fileName,topPart);
strcpy(fileName,itter);
strcpy(fileName,lowerPart);
Serial.println(i);
}
}
Lots of problems.
the construction of itter is wrong.
strcpy doesn't append just cpy.
Here is a code example to build your filename. This a basic C program. Remove the #include and main for Arduino, this allows to test on your computer whether the program is ok.
#include <string.h>
#define TOPPART "/Data/Data"
#define LOWERPART ".txt"
int main(void) {
char buf[64];
snprintf(buf, sizeof(buf), "%s%s", TOPPART, LOWERPART);
for (int i = 0; i < 100; i++) {
/* here your stuff to check if the filename froml buf exists*/
snprintf(buf, sizeof(buf), "%s%d%s", TOPPART, i, LOWERPART);
}
return 0;
}
Could anyone give some simple examples (function names are good) for reading text files line by line (binary is OK if text is really hard) in a FreeBSD kernel module, from a given directory?
Really appreciate your kind help.
Here's a sample kernel module that'll cat your /etc/motd on load:
// kernel module motd catter.
// Doug Luce doug#forephypodia.con.com
#include <sys/param.h>
#include <sys/vnode.h>
#include <sys/fcntl.h>
#include <sys/module.h>
#include <sys/kernel.h>
#include <sys/namei.h>
#include <sys/proc.h>
#include <sys/sbuf.h>
static int catfile(const char *filename) {
struct sbuf *sb;
static char buf[128];
struct nameidata nd;
off_t ofs;
ssize_t resid;
int error, flags, len;
NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, filename, curthread);
flags = FREAD;
error = vn_open(&nd, &flags, 0, NULL);
if (error)
return (error);
NDFREE(&nd, NDF_ONLY_PNBUF);
ofs = 0;
len = sizeof(buf) - 1;
sb = sbuf_new_auto();
while (1) {
error = vn_rdwr(UIO_READ, nd.ni_vp, buf, len, ofs,
UIO_SYSSPACE, IO_NODELOCKED, curthread->td_ucred,
NOCRED, &resid, curthread);
if (error)
break;
if (resid == len)
break;
buf[len - resid] = 0;
sbuf_printf(sb, "%s", buf);
ofs += len - resid;
}
VOP_UNLOCK(nd.ni_vp, 0);
vn_close(nd.ni_vp, FREAD, curthread->td_ucred, curthread);
uprintf("%s", sbuf_data(sb));
return 0;
}
static int EventHandler(struct module *inModule, int inEvent, void *inArg) {
switch (inEvent) {
case MOD_LOAD:
uprintf("MOTD module loading.\n");
if (catfile("/etc/motd") != 0)
uprintf("Error reading MOTD.\n");
return 0;
case MOD_UNLOAD:
uprintf("MOTD module unloading.\n");
return 0;
default:
return EOPNOTSUPP;
}
}
static moduledata_t moduleData = {
"motd_kmod",
EventHandler,
NULL
};
DECLARE_MODULE(motd_kmod, moduleData, SI_SUB_DRIVERS, SI_ORDER_MIDDLE);
This was cobbled together mostly from bits of https://svnweb.freebsd.org/base/release/10.1.0/sys/kern/vfs_mountroot.c?revision=274417&view=markup
There's no nice scanning/parsing facilities native kernel-side, so
that's usually done the hard way.
This code:
#include <stdlib.h>
#include <stdio.h>
int j_btree_create (int fn_initial_nodes);
typedef struct {
int depth;
int value;
void *item;
void *left_pointer;
void *right_pointer;
} j_btree_node_int;
typedef struct {
int nodes;
int available_nodes;
int btree_extension;
} j_btree_descriptor_int;
int j_btree_create (int fn_initial_nodes) {
int *free_btree_node;
int loop_counter;
j_btree_descriptor_int *btree_start;
btree_start = (j_btree_descriptor_int *) malloc (((sizeof(j_btree_node_int) + sizeof(free_btree_node)) * fn_initial_nodes) + sizeof(j_btree_descriptor_int));
printf ("btree_start: " . btree_start);
/* *btree_start.nodes = fn_initial_nodes;
*btree_start.available_nodes = fn_initial_nodes;
*btree_start.extension = NULL; */
for (loop_counter = 0; loop_counter < fn_initial_nodes; loop_counter++) {
printf ("loop_test:" . loop_counter);
}
}
Produces this error:
/home/jamie/aws/btree_int.c||In function ‘j_btree_create’:|
/home/jamie/aws/btree_int.c|28|error: request for member ‘btree_start’ in something not a structure or union|
/home/jamie/aws/btree_int.c|33|error: request for member ‘loop_counter’ in something not a structure or union|
||=== Build finished: 2 errors, 0 warnings ===|
When compiled with CodeBlocks. I have not managed to find an exact answer to my problem (I have looked), does anyone know roughly what I am doing wrong? Probably more than one thing given I am fairly new to C.
printf ("btree_start: " . btree_start);
This is not how the things are done in c. There's no . concatenation operator and you do not concatenate strings (pointers to characters) and pointers to structures. If you want to print out the pointer, it's
printf("btree_start: %p\n",btree_start);
For the loop counter it's
printf("loop_test: %d",loop_counter);