I have an unsigned char array unsigned char* name = malloc(nameLength); - how can I print it with printf? %sdoes not seem to work correctly, neither does %u (seeing random icons).
Here's how I create the data I want to print:
__int32 nameLength;
ReadProcessMemory(hProcess, (LPCVOID)(classNamePtr + 0x0004), &nameLength, sizeof(__int32), 0); //Reads nameLength to be 13 in this case
unsigned char* name = malloc(nameLength+5); //Add 5 for good measure, it is null terminated
ReadProcessMemory(hProcess, (LPCVOID)(nameStrPtr), name, nameLength, 0);
name[nameLength] = 0; //null terminate
printf("%s", name); //Outputs single character strange characters, like an up icon
When one detects a non-printable char, output an escape sequence or hexadecimal value
#include <ctype.h>
#include <string.h>
#include <stdio.h>
int printf_ByteArray(const unsigned char *data, size_t len) {
size_t i;
int result = 0;
for (i = 0; i < len; i++) {
int y;
int ch = data[i];
static const char escapec[] = "\a\b\t\n\v\f\n\'\"\?\\";
char *p = strchr(escapec, ch);
if (p && ch) {
static const char escapev[] = "abtnvfn\'\"\?\\";
y = printf("\\%c", escapev[p - escapec]);
} else if (isprint(ch)) {
y = printf("%c", ch);
} else {
// If at end of array, assume _next_ potential character is a '0'.
int nch = i >= (len - 1) ? '0' : data[i + 1];
if (ch < 8 && (nch < '0' || nch > '7')) {
y = printf("\\%o", ch);
} else if (!isxdigit(nch)) {
y = printf("\\x%X", ch);
} else {
y = printf("\\o%03o", ch);
}
}
if (y == EOF)
return EOF;
result += y;
}
return result;
}
If data contained one of each byte, sample follows:
\0...\6\a\b\t\n\v\f\xD\xE\xF\x10...\x1F !\"#$%&\'()*+,-./0123456789:;<=>\?#ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7F...\xFE\o377
The selection of escape sequences will vary with code goals. The set above attempts to conform to something a C parser would accept.
Note: With the last else, always outputting a 3-digit octal sequence has scanning advantages, but folks are more accustomed to hexadecimal than octal.
Adjusted to conditionally print in hex depending on the following character.
Related
I am currently trying to parse UnicodeData.txt with this format: ftp://ftp.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.html However, I am hitting a problem in that when I try to read, say a line like the following.
something;123D;;LINE TABULATION;
I try to get the data from the fields by code such as the following. The problem is that fields[3] is not getting filled in, and scanf is returning 2. in is the current line.
char fields[4][256];
sscanf(in, "%[^;];%[^;];%[^;];%[^;];%[^;];",
fields[0], fields[1], fields[2], fields[3]);
I know this is the correct implementation of scanf(), but is there a way to get this to work, short of making my own scanf()?
scanf does not handle "empty" fields. So you will have to parse it on your own.
The following solution is:
fast, as it uses strchr rather than the quite slow sscanf
flexible, as it will detect an arbitrary number of fields, up to a given maximum.
The function parse extracts fields from the input str, separated by semi-colons. Four semi-colons give five fields, some or all of which can be blank. No provision is made for escaping the semi-colons.
#include <stdio.h>
#include <string.h>
static int parse(char *str, char *out[], int max_num) {
int num = 0;
out[num++] = str;
while (num < max_num && str && (str = strchr(str, ';'))) {
*str = 0; // nul-terminate previous field
out[num++] = ++str; // save start of next field
}
return num;
}
int main(void) {
char test[] = "something;123D;;LINE TABULATION;";
char *field[99];
int num = parse(test, field, 99);
int i;
for (i = 0; i < num; i++)
printf("[%s]", field[i]);
printf("\n");
return 0;
}
The output of this test program is:
[something][123D][][LINE TABULATION][]
Update: A slightly shorter version, which doesn't require an extra array to store the start of each substring, is:
#include <stdio.h>
#include <string.h>
static int replaceSemicolonsWithNuls(char *p) {
int num = 0;
while ((p = strchr(p, ';'))) {
*p++ = 0;
num++;
}
return num;
}
int main(void) {
char test[] = "something;123D;;LINE TABULATION;";
int num = replaceSemicolonsWithNuls(test);
int i;
char *p = test;
for (i = 0; i < num; i++, p += strlen(p) + 1)
printf("[%s]", p);
printf("\n");
return 0;
}
Just in case you would like to consider this following alternative, using scanfs and "%n" format-specifier, used for reading in how many characters have been read by far, into an integer:
#include <stdio.h>
#define N 4
int main( ){
char * str = "something;123D;;LINE TABULATION;";
char * wanderer = str;
char fields[N][256] = { 0 };
int n;
for ( int i = 0; i < N; i++ ) {
n = 0;
printf( "%d ", sscanf( wanderer, "%255[^;]%n", fields[i], &n ) );
wanderer += n + 1;
}
putchar( 10 );
for ( int i = 0; i < N; i++ )
printf( "%d: %s\n", i, fields[i] );
getchar( );
return 0;
}
On every cycle, it reads maximum of 255 characters into the corresponding fields[i], until it encounters a delimiter semicolon ;. After reading them, it reads in how many characters it had read, into the n, which had been zeroed (oh my...) beforehand.
It increases the pointer that points to the string by the amount of characters read, plus one for the delimiter semicolon.
printf for the return value of sscanf, and the printing of the result is just for demonstration purposes. You can see the code working on http://codepad.org/kae8smPF without the getchar(); and with for declaration moved outside for C90 compliance.
I don't think sscanf will do what you need: sscanf format %[^;] will match a non-empty sequence of not-semicolon characters. The alternative would be using readline with the separator being ';', like:
#include <iostream>
#include <sstream>
#include <string>
int main() {
using namespace std;
istringstream i { "something;123D;;LINE TABULATION;\nsomething;123D;;LINE TABULATION;\nsomething;123D;;LINE TABULATION;\n" };
string a, b, c, d, newline;
while( getline(i, a, ';') && getline(i, b, ';') && getline(i, c, ';') && getline (i, d, ';') && getline(i, newline) )
cout << d << ',' << c << '-' << b << ':' << a << endl;
}
(I have only seen you took the c++ tag off this question now, if your problem is c-only, I have another solution, below:)
#include <string.h>
#include <stdio.h>
int main() {
typedef char buffer[2048];
buffer line;
while( fgets(line, sizeof(line), stdin) > 0 ) {
printf("(%s)\n", line);
char *end = line;
char *s1 = *end == ';' ? (*end = '\0'), end++ : strtok_r(end, ";", &end);
char *s2 = *end == ';' ? (*end = '\0'), end++ : strtok_r(end, ";", &end);
char *s3 = *end == ';' ? (*end = '\0'), end++ : strtok_r(end, ";", &end);
char *s4 = *end == ';' ? (*end = '\0'), end++ : strtok_r(end, ";", &end);
printf("[%s][%s][%s][%s]\n", s4, s3, s2, s1);
}
}
While searching for the meaning of the statement of K&R C Exercise 7-2 I foun this answer to the K&R C Exercise 7.2 on https://clc-wiki.net/wiki/K%26R2_solutions:Chapter_7:Exercise_2
The Exercise asks to
Write a program that will print arbitrary input in a sensible way. As a minimum, it should print non-graphic characters in octal or hexadecimal according to local custom, and break long text lines.
Also I am unable to understand the meaning of sentence of the exercise, specifically the part "As a minimum, it should print non-graphic characters in octal or hexadecimal according to local custom".
What does this exercise 7-2 asks for? Please explain the meaning of the statement of exercise.
The code below uses format specifiers "%02x " and "%3o " are used at the end of the code to print non-printable characters but what exactly this format specifiers do to Non-Printables?
else
{
if(textrun > 0 || binaryrun + width >= split)
{
printf("\nBinary stream: ");
textrun = 0;
binaryrun = 15;
}
printf(format, ch);
binaryrun += width;
}
Rest of the code break long lines into smaller ones and print all printable characters as it is.
The Complete Program is as below:
#include <stdio.h>
#define OCTAL 8
#define HEXADECIMAL 16
void ProcessArgs(int argc, char *argv[], int *output)
{
int i = 0;
while(argc > 1)
{
--argc;
if(argv[argc][0] == '-')
{
i = 1;
while(argv[argc][i] != '\0')
{
if(argv[argc][i] == 'o')
{
*output = OCTAL;
}
else if(argv[argc][i] == 'x')
{
*output = HEXADECIMAL;
}
else
{
/* Quietly ignore unknown switches, because we don't want to
* interfere with the program's output. Later on in the
* chapter, the delights of fprintf(stderr, "yadayadayada\n")
* are revealed, just too late for this exercise.
*/
}
++i;
}
}
}
}
int can_print(int ch)
{
char *printable = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890 !\"#%&'()*+,-./:;<=>?[\\]^_{|}~\t\f\v\r\n";
char *s;
int found = 0;
for(s = printable; !found && *s; s++)
{
if(*s == ch)
{
found = 1;
}
}
return found;
}
int main(int argc, char *argv[])
{
int split = 80;
int output = HEXADECIMAL;
int ch;
int textrun = 0;
int binaryrun = 0;
char *format;
int width = 0;
ProcessArgs(argc, argv, &output);
if(output == HEXADECIMAL)
{
format = "%02X ";
width = 4;
}
else
{
format = "%3o ";
width = 4;
}
while((ch = getchar()) != EOF)
{
if(can_print(ch))
{
if(binaryrun > 0)
{
putchar('\n');
binaryrun = 0;
textrun = 0;
}
putchar(ch);
++textrun;
if(ch == '\n')
{
textrun = 0;
}
if(textrun == split)
{
putchar('\n');
textrun = 0;
}
}
else
{
if(textrun > 0 || binaryrun + width >= split)
{
printf("\nBinary stream: ");
textrun = 0;
binaryrun = 15;
}
printf(format, ch);
binaryrun += width;
}
}
putchar('\n');
return 0;
}
You have found by yourself what "%02x " and "%03o " mean. That is good!
So your question boils down to "What are non-printable characters?" and "How are they printed with the mentioned formats?"
A non-printable character is defined (in the source shown) by the string printable in function can_print(). All characters not in this string are deliberately defined to be non-printable. We can reason about the selection, but this is out of scope here. Another note: " " and "\t\f\v\r\n" are in this set of printable characters and have a value of <= 0x20 in ASCII.
BTW, the standard library has isprint() that checks for printability.
As you seem to know each character is encoded as an assigned value. This value can be interpreted as you like, as a character, as a number, as an instruction, as a colour, as a bit pattern, anything. Actually all digital computers are just working on bit patterns, it's up to the interpretation what they mean.
So a non-printable character can be interpreted as an int number, and this is what happens by printf() with the mentioned format. Let's say that the character read is '\b', known as backspace. (Note: It is not in printable.) In ASCII this character is encoded as 0x08. So the output will be "08 " and "010 ", respectively.
You might like to change the program in such way that all characters are considered non-printable. Then you'll see all characters output as hex or octal.
So I tasked myself to write a function, that:
overwrites an int with a safe value (not return gibberish if the
user decides to input char-s or anything bigger by absolute value
than (2^31-1)
if input exceeds (2^31 - 1) (meaning if the user inputs 8 or more
digits) the int must be overwritten with the upper value
Here is the code:
void getSafeIntWithBoundaries(int *dest, int lo, int hi, const char *message);
bool anyChars(const char *input, int len);
int main() {
int x;
getSafeIntWithBoundaries(&x, 1, 10, "Enter an integer between 0 and 10.");
printf("x = %d\n", x);
return 0;
}
void getSafeIntWithBoundaries(int * dest, int lo, int hi, const char * message) {
char input[33];
while (1) {
puts(message);
fgets(input, 33, stdin);
int len = strlen(input);
if (input[len - 1] == '\n') { input[len - 1] = '\0'; }
--len;
if (bool reset = anyChars(input, len)) {
puts("Try again.");
continue;
}
else {
int ret;
if (strcmp("2147483648", input) < 0) {
*dest = hi;
return;
}
sscanf(input, "%d", &ret);
ret = ret > hi ? hi : ret;
ret = ret < lo ? lo : ret;
*dest = ret;
break;
}
}
}
bool anyChars(const char * input, int len) {
for(int i = 0; i < len; i++) {
if (!isdigit(input[i])) {
return true;
}
}
return false;
}
A few more notes:
in getSafeIntWithBoundaries(...) I'm getting rid of the '\n', I'm
changing it for a '\0', respectively decreasing int len; which holds
the length of the input.
anyChars() checks whether the input contains any non digit char. If
it does, then the user has to re-enter. One of the problems is
however that in case of failure, message needs to be printed out only
once. If I input something ridiculously long, message will be printed
multiple times. I don't know how to fix this.
the strcmp() bit checks if the user entered a number bigger than
(2^31 - 1). If the user has, then the int must be overwritten with
the high value and the function needs to end. Problem is however, if
the user enters a very long number, the target int will be
overwritten with the low boundary. I don't know how to fix that
either.
2 ?s making sure the target int won't exceed its boundaries. I marked
the parts that I can't figure out with bold, essentially that's the
whole question.
Suggestions on improving the code are welcomed as well.
Suggestions on improving the code are welcomed
Code fails many cases
Overflow UB
When the range exceed int, sscanf(input, "%d", &ret) is undefined behavior.
Long lines not consumed
When input is more than 32 characters (including the '\n), left over input remains.
Null character input
Input starting with a null character '\0' lead to undefined behavior with input[len - 1]
Non ASCII input
isdigit(input[i]) is undefined behavior when input[i] < 0.
Assumed ranged
Code uses int assuming it covers the range 2^31 - 1. C requires int to have a
minimum range of [-32,767 ... 32,767].
Unclear goals
"if input exceeds (2^31 - 1) (meaning if the user inputs 8 or more digits)" --> What if input is `"0000000000000000000000000000000000001\n"? 35 zeros? It is in range yet exceeds 8 digits and exceed 33 character buffer.
End-of-file
puts("Try again."); does not make sense if input is closed. I'd expect int getSafeIntWithBoundaries() to return 1 on success, 0 on failure, EOF on end-of-file/input error.
Below is some untested code - will test later. I'll work on the message details later. It is certainty more than what one might think is needed to simply read an `int, but if you want robust code, it is work.
To read an entire line of input obliges reading until '\n' or EOF.
I'd tolerate leading and trailing spaces.
strtol() is good , but then the entire line needs to be read first. Recall valid input can have many leading spaces or zeros.
Do not overflow intmath- it is UB. Summing the value with negativesint` has greater range than the positive side.
Pre-C99 /,% has implementation defined behavior when the remainder is non-zero - so I avoided that.
#include <errno.h>
#include <limits.h>
#include <stdio.h>
#define INT_MIN_LS_DIGIT ((-(INT_MIN + 10)) % 10)
#define INT_MIN_DIV_10 ((INT_MIN + INT_MIN_LS_DIGIT)/10)
int getSafeIntWithBoundaries(int * dest, int lo, int hi, const char *message) {
fputs(message, stdout);
fflush(stdout); // Insure data to sent out completely
int ch;
while (isspace((ch = fgetc(stdin))) && (ch != '\n')) {
;
}
bool positive = true;
if (ch == '-' || ch == '+') {
positive = ch == '+';
ch = fgetc(stdin);
}
bool digit_found = false;
bool overflow = false;
int sum = 0;
while (isdigit(ch)) {
digit_found = true;
int digit = ch = '0';
// Detect possible overflow
if (sum <= INT_MIN_DIV_10
&& (sum < INT_MIN_DIV_10 || digit > INT_MIN_LS_DIGIT)) {
sum = INT_MIN;
overflow = true;
} else {
sum = sum * 10 - digit;
}
}
if (positive) {
if (sum < -INT_MAX) {
sum = INT_MAX;
overflow = true;
} else {
sum = -sum;
}
}
if (sum > hi) {
sum = hi;
overflow = true;
}
if (sum < lo) {
sum = lo;
overflow = true;
}
*dest = sum;
while (isspace(ch) && ch != '\n') {
ch = fgetc(stdin);
}
if (ch == EOF && iserror(stdin)) {
return EOF; // Rare input error detected
}
if (!digit_found) {
return 1; // or a "No digit found" error code
}
if (overflow) {
errno = ERANGE;
return 1; // or a "Overflow" error code
}
if (ch != '\n' && ch != EOF) {
return 1; // or a "Extra trailing junk" error code
}
return 0;
}
strtol could be used to parse an integer from a string. It provides for overflow and the pointer to the last character allows for testing for valid terminating characters. This set the range to 0 and INT_MAX but any range from INT_MIN to INT_MAX could be used. The terminating character is nul but could be comma, semicolon or any appropriate character.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <errno.h>
#include <limits.h>
//inputs
// char *line : pointer to text to be parsed
// char **next : pointer to pointer to allow modification of caller's pointer
// char *term : pointer to characters to be considered terminators
// int *value : pointer to int to allow modification of caller's int
// int min : minimum value of range
// int max : maximum value of range
// returns : 0 failure or 1 success
int get_int_range ( char *line, char **next, char *delim, int *value, int min, int max)
{
long int input = 0;
char *end = NULL;//will point to end of parsed value
if ( line == NULL) {
return 0;
}
errno = 0;
input = strtol ( line, &end, 10);//get the integer from the line. end will point to the end of the parsed value
if ( end == line) {// nothing was parsed. no digits
printf ( "input [%s] MUST be a number\n", line);
return 0;// return failure
}
// *end is the character that end points to
if ( *end != '\0' && !( delim && strchr ( delim, *end))) {// is *end '\0' or is *end in the set of term characters
printf ( "problem with input: [%s] \n", line);
return 0;
}
if ( ( errno == ERANGE && ( input == LONG_MAX || input == LONG_MIN))
|| ( errno != 0 && input == 0)){// parsing error from strtol
perror ( "input");
return 0;
}
if ( input < min || input > max) {// parsed value is outside of range
printf ( "input out of range %d to %d\n", min, max);
return 0;
}
if ( next != NULL) {// if next is NULL, caller did not want pointer to end of parsed value
*next = end;// *next allows modification to caller's pointer
}
if ( value == NULL) {
return 0;
}
*value = input;// *value allows modification to callers int
return 1;// success
}
int main( int argc, char *argv[])
{
char line[900] = {'\0'};
int valid = 0;
int number = 0;
do {
printf ( "Enter number or enter quit\n");
fgets ( line, sizeof ( line), stdin);//read a line
if ( strcmp ( line, "quit\n") == 0) {
return 1;// if quit is entered, exit the program
}
line[strcspn ( line, "\n")] = '\0';//remove trailing newline
valid = get_int_range ( line, NULL, "", &number, 0, INT_MAX);// call to parse a value
} while ( !valid);// on failure, keep looping the above
printf ( "input is %d\n", number);
return 0;
}
Reposting because my first post was no good. I have a question that I'm not really sure how to do. I know the process I'm going for, but am not totally sure how to scan a string into an array so that each character/integer is scanned into a independent element of the array. I'll post the question and the code I have so far, and any help would be appreciated.
Question:
Assume that we have a pattern like the following: ([n][letter])+ in which n is an integer number and letter is one of the lowercase letters from a-z. For example, 2a and 3b are valid expressions based on our pattern. Also, “+” at the end of the pattern means that we have at least one expression (string) or more than one expression attached. For instance, 2a4b is another valid expression which is matched with the pattern. In this question, we want to convert these valid expressions to a string in which letters are repeated n times.
o Read an expression (string) from user and print the converted version of the expression in the output.
o Check if input expression is valid. For example, 2ab is not a valid expression. If the expression is not valid, print “Invalid” in the output and ask user to enteranother expression.
o Sample input1 = “2a”, output = aa
o Sample input2 = “2a3b”, output = aabbb
o You will receive extra credit if you briefly explain what concept or theory you can use to check whether an expression is valid or not.
What I have so far:
#include <stdio.h>
int main()
{
int size, i, j;
char pattern[20];
char vowel[20];
int count[20];
printf("Please enter your string: ");
gets(pattern);
size = strlen(pattern);
for(i=0; i<size; i++)
if((i+1)%2 == 0)
vowel[i] = pattern[i];
else if((i+1)%2 != 0)
count[i] = pattern[i];
for(i=0; i<size/2; i++);
for(j=0; j<count[i]; j++)
printf("%s", vowel[i]);
}
I assumed you want to write the "invalid\n" string on stderr. If not just change the file descriptor given to write.
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#define MAX_INPUT_SIZE 20
int
check_input(char *input)
{
while (*input)
{
if (*input < '0' || *input > '9')
{
write(2, "invalid\n", 8);
return 1;
}
while (*input >= '0' && *input <= '9')
input++;
if (*input < 'a' || *input > 'z')
{
write(2, "invalid\n", 8);
return 1;
}
input++;
}
return 0;
}
void
print_output(char *input)
{
int i;
while (*input)
{
i = atoi(input);
while (*input >= '0' && *input <= '9')
input++;
for (; i > 0; i--)
write(1, input, 1);
input++;
}
write(1, "\n", 1);
}
int
main()
{
char input[MAX_INPUT_SIZE];
do
{
printf("Please enter your string: ");
fgets(input, MAX_INPUT_SIZE, stdin);
input[strlen(input) - 1] = '\0';
}
while (check_input(input));
print_output(input);
return 0;
}
The steps are:
Read pattern
Check if pattern is valid
Generate output
Since the input length is not specified you have to assume a maximum length.
Another assumption is n is a single digit number.
Now you may read the whole expression with fgets() or read it char by char.
The latter allows you to check for validity as you read.
Lets use fgets() for convenience and in case the expression needs to be stored for later use.
char exp[100]; // assuming at most 50 instances of ([n][letter])
int len;
printf("Input: ");
fgets(exp, 100, stdin);
len = strlen(exp) - 1; // Discard newline at end
An empty input is invalid. Also a valid expression length should be even.
if (len == 0 || len%2 != 0) {
printf("Invalid-len\n");
return 1;
}
Now parse the expression and separately store numbers and letters in two arrays.
char nums[50], letters[50];
invalid = 0;
for (i = 0, j = 0; i < len; i += 2, j++) {
if (exp[i] >= '1' && exp[i] <= '9') {
nums[j] = exp[i] - '0';
} else {
invalid = 1;
break;
}
if (exp[i+1] >= 'a' && exp[i+1] <= 'z') {
letters[j] = exp[i+1];
} else {
invalid = 1;
break;
}
}
Notice that in each iteration if first char is not a number or second char is not a letter, then the expression is considered to be invalid.
If the expression is found to be invalid, nothing to do.
if (invalid) {
printf("Invalid\n");
return 1;
}
For a valid expression run nested loops to print the output.
The outer loop iterates for each ([n][letter]) pattern.
The inner loop prints n times the letter.
printf("Output: ");
for (i = 0; i < len/2; i++) {
for (j = 0; j < nums[i]; j++)
printf("%c", letters[i]);
}
This is a rather naive way to solve problems of this type. It is better to use regular expressions.
C standard library doesn't have regex support. However on Unix-like systems you can use POSIX regular expressions.
like this
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <stdbool.h>
#define prompt "Please enter your string: "
void occurs_error(const char *src, const char *curr){
printf("\nInvalid\n");
printf("%s\n", src);
while(src++ != curr){
putchar(' ');
}
printf("^\n");
}
bool invalid(char *pattern){
char *p = pattern;
while(*p){
if(!isdigit((unsigned char)*p)){//no number
occurs_error(pattern, p);
break;
}
strtoul(p, &p, 10);
if(!*p || !islower((unsigned char)*p)){//no character or not lowercase
occurs_error(pattern, p);
break;
}
++p;
}
return *p;
}
int main(void){
char pattern[20];
while(fputs(prompt, stdout), fflush(stdout), fgets(pattern, sizeof pattern, stdin)){
pattern[strcspn(pattern, "\n")] = 0;//chomp newline
char *p = pattern;
if(invalid(p)){
continue;
}
while(*p){
int n = strtoul(p, &p, 10);
while(n--)
putchar(*p);
++p;
}
puts("");
}
}
Using only stdio.h, string.h and stdlib.h libraries how would I go about implementing this?
I'm quite new to programming so please bear with me!
Allocate a new char array of the same length as your string. Convince yourself that this is enough space. Don't forget the NUL.
Loop through the string, copying to the new string only those characters that are alphanumeric. You can't do this portably without also including <ctype.h> and using a function/macro from that header, unless you're going to enumerate all characters that you consider alphanumeric.
Again, don't forget the NUL.
Since this is homework, here is the verbal description:
Run a loop over the original string and use the functions isalnum() to determine if a character is alphanumeric. Maintain a second char array of reasonable size and every time you encounter an AlphaNum, insert it to that array. Once all AlphaNum characters have been copied to the second array, NULL terminate it and you have your string.
Note: isalnum() is defined in ctype.h, so if you aren't allowed to use that, you may have to define this function for yourself. That is another exercise of it's own.
Every char you read in your string is a byte (you can think it as a number between 0 and 255, and that's how the computers handle them) so you just need to check the ascii table to see what letter refers to.
Every alphanumerical char is in this range: [48, 58] (for numbers), or [65, 90] (upper case), or [97, 122] (lower case).
Look at this:
#include <stdio.h>
#include <stdlib.h>
#define SIZE 64
int isalphanum(char); /*states if a char is alphanumerical or not*/
char *getalphanum(char *, char*); /*modifies the second string to get the result*/
int main(void) {
char in[SIZE] = "Hello, W##########orl...,.,d!"; /*just a random to try out*/
char out[SIZE];
getalphanum(in, out);
printf("%s", out);
return 0;
}
int isalphanum(char a) {
if ((a >= 48) && (a <= 58))
return 1;
if ((a >= 65) && (a <= 90))
return 1;
if ((a >= 97) && (a <= 122))
return 1;
return 0;
}
char *getalphanum(char *s, char *t) {
if ((s == NULL) || (t == NULL)) /*tests if the strings are "handble"*/
return NULL;
int i = 0;
int j = 0;
char c;
while ((c = *(s + i)) != '\0') {
if (isalphanum(c)){
*(t + j) = c;
j++;
}
i++;
}
*(t + j) = '\0';
return t;
}
This code works and is very simple and can be improved, but there is evertything you need.
The best way is to use the isalnum() from ctype.h but now that is not an option, I have written a not-standard/non-portable function called isalnum_not_prefered() which is the equivalent of ctype.h's isalnum().
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>
int isalnum_not_prefered(char s)
{
if((s >= 'A' && s <= 'Z') ||
(s >= 'a' && s <= 'z') ||
(s >= '0' && s <= '9'))
return 1;
return 0;
}
int main(void)
{
char str[] = "this!234$#&##$^is5##$a##$4677~=_?}valid2234kjstring";
int len = strlen(str);
int i, j=0;
char *newstr1 = NULL;
char *newstr2 = NULL;
if ((newstr1 = (char *) malloc(sizeof(char) * len + 1)) == NULL) {
printf("unable to allocate memory \n");
return -1;
}
for (i=0 ; i<len ; i++) {
if (isalnum(str[i])) {
newstr1[j] = str[i];
j++;
}
}
newstr1[j] = '\0';
if ((newstr2 = (char *) malloc(sizeof(char) * len + 1)) == NULL) {
printf("unable to allocate memory \n");
return -1;
}
j=0;
for (i=0 ; i<len ; i++) {
if (isalnum_not_prefered(str[i])) {
newstr2[j] = str[i];
j++;
}
}
newstr2[j] = '\0';
printf("string : %s \n", str);
printf("result1 : %s \n", newstr1);
printf("result2 : %s \n", newstr2);
free(newstr1);
free(newstr2);
return 0;
}
Points to note:
strings in C is terminated with \0. So the new string that your are populating should also terminate with \0
malloc()'ed memory must be free()'ed
malloc() errors should be handled
this code is not portable as it assumes the machines character set to be ASCII. If the hardware supports some other character set (say EBCDIC) then this may not work as expected.
Hope this helps!