ΗΥ-150 Προγραμματισμός Αλυαριθμητικά (Strings)
Χαξαθηήξεο Αλαπαξηζηώληαη από έλαλ δπαδηθό αξηζκό 8 δπαδηθώλ ςεθίσλ (8 bits) (256 δηαθνξεηηθέο ηηκέο) Η θσδηθνπνίεζή ηνπο έρεη ηππνπνηεζεί κε ηνλ θώδηθα ASCII Δθηόο από γξάκκαηα θαη ςεθία ππάξρνπλ θαη πνιινί εηδηθνί ραξαθηήξεο Σηαζεξέο ραξαθηήξσλ Η έθθξαζε 'z' είλαη κηα ζηαζεξά θαη αληηζηνηρεί ζε έλαλ int κε ηα 8 ηειεπηαία ηνπ bits ίζα κε ηνλ ASCII θσδηθό ηνπ ραξαθηήξα 'z, \t, \n, κτλ
Βαζηθά γηα ηα Αιθαξηζκεηηθά (strings) Μηα ζεηξά από ραξαθηήξεο πνπ αληηκεησπίδνληαη ζαλ έλα αληηθείκελν Γξάκκαηα, αξηζκνί, εηδηθνί ραξαθηήξεο (*, /, $) θαη όινη νη εθηππώζηκνη ραξαθηήξεο Τηκέο εηζάγνληαη κέζα ζε δηπιά εηζαγσγηθά "Hello" Τα Strings είλαη πάντα πίλαθεο από ραξαθηήξεο Έλα String είλαη δείθηεο ζηνλ 1 ν ραξαθηήξα ηνπ πίλαθα Τηκή ηνπ string είλαη ε δηεύζπλζε ηνπ 1 νπ ραξαθηήξα ηνπ πίλαθα
char vs. C string A data type char and is stored in 1 byte 5000 A A is a C string of 1 character and is stored in 2 bytes 6000 A 6001 \0
Διαθοπά μεηαξύ'a' και "A". Το ππώηο είναι ο, έναρ, σαπακηήπαρ A και ηο δεύηεπο είναι ηο string A. Αθού ηα strings είναι null-terminated, ηο "A" σπηζιμοποιεί 2 σαπακηήπερ ηο 'A' και ηο'\0'. Το "Hello" σπηζιμοποιεί 6 σαπακηήπερ, 'H', 'e', 'l', 'l', 'o', and '\0'. Έζηω η δήλωζη: char name[16]; Αθού ηα C/C++ strings είναι null-terminated και ο name έσει 16 σαπακηήπερ,, ηο μεγαλύηεπο string πος μποπεί να αποθηκεςθεί ζηον name είναι ένα 15 σαπακηήπων. Εάν αποθηκεύζοςμε ένα string 10 σαπακηήπων ζηον name, ηόηε μόνο ηα ππώηα 11 ζηοισεία ηος είναι ζε σπήζη, ενώ ηα ςπόλοιπα 5 όσι.
char message[8]; // declaration allocates memory Για ηο μεηαθπαζηή, η ηιμή ηος message είναι η διεύθςνζη ηηρ απσήρ ηος πίνακα. 6000 H e l l o \0 message [0] [1] [2] [3] [4] [5] [6] [7]
Character Arrays char pmessage[] = "now is the time"; /* an array */ char *amessage = "now is the time"; /* a pointer */
Γειώζεηο Γειώζεηο αιθαξηζκεηηθώλ Σαλ πίλαθαο από ραξαθηήξεο ή ζαλ δείθηεο ζε ραξαθηήξα char * char color[] = "blue"; char *colorptr = "blue"; Κάζε string ηειεηώλεη κε '\0' θαη πξέπεη λα ην ιακβάλνπκε ππόςε ζηε δήισζε ηνπ πίλαθα color has 5 elements
string[0] = J string[1] = I string[2] = b string[3] = r string[4] = a string[5] = n string[6] = string[7] = B string[8] = h string[9] = a string[10] = t string[11] = \0
Αλάγλσζε Γηάβαζκα strings Χξήζε cin cin >>word; Αληηγξάθεη ζην word[] Αθήλνπκε ρώξν ζηνλ πίλαθα θαη γηα ην '\0'
1 2 3 #include <iostream> 4 5 int main() 6 { 7 char string1[ 20 ], string2[] = "string literal"; 8 int i; 9 10 cout << " Enter a string: "; 11 cin >> string1 ; 12 cout <<"string1 is: <<string1<<endl<<string2: is <<string2; 13 cout <<"string1 with spaces between characters is: << endl; 14 15 16 for ( i = 0; string1[ i ]!= '\0'; i++ ) 17 cout << string1[ i ] << ; 18 19 cout << endl; 20 return 0; 21 } Enter a string: Hello there string1 is: Hello string2 is: string literal string1 with spaces between characters is: H e l l o
Πξόβιεκα : Δθηππώζηε αλάζηξνθα ην θείκελν πνπ ζα δηαβαζηεί από ηελ νζόλε. #define N 100 void inversion(char *s) { int i; for (i = strlen(s)-1; i >= 0; --i) cout << s[i]; } cout << endl; int main() { char s[n]; cout << "Dwste mia le3h mexri << N << xarakthres < < endl; cin >> s; inversion(s); } return 0;
Οπιζμόρ Strings Παξαδείγκαηα: hello, high school, H2O. Σπλεζηζκέλεο «πξάμεηο» κε strings: Παξάζεζε: high + school = highschool Σύγθξηζε: high < school // alphabetical
Standard C library accessed by #include <string.h> #include <strings.h>
Χξήζηκεο Σπλαξηήζεηο (string.h) int strlen( const char *s ); Δπηζηξέθεη ηνλ αξηζκό ησλ ραξαθηήξσλ πξηλ ην \0 πνπ βξίζθνληαη ζην s (το μήκος string) char *strdup(const char *s1); Γεζκεύεη όζε κλήκε ρξεηάδεηαη θαη αληηγξάθεη ζε απηήλ ην αιθαξηζκεηηθό ζην s1. Δπηζηξέθεη ηελ θαηλνύξγηα κλήκε κε ην θαηλνύξγην αληίγξαθν ηνπ s1. Η δεζκεπκέλε κλήκε ρξεηάδεηαη λα απνδεζκεπηεί ζην ηέινο κε ηελ free.
strlen /* strlen: return length of string s */ int strlen(char *s) { int n; for (n = 0; s[n]!= '\0', n++) ; return n; }
strlen /* strlen: return length of string s */ int strlen(char *s) { char *p = s; while (*p!= '\0') p++; return p s; }
strlen /* strlen: return length of string s */ int strlen(char *s) { int n; for (n = 0; *s!= '\0', s++) n++; return n; }
strcpy /* strcpy: copy t to s; array subscript version */ void strcpy(char *s, char *t) { int i; i = 0; while ((s[i] = t[i])!= '\0') i++; }
strcpy /* strcpy: copy t to s; pointer version */ void strcpy(char *s, char *t) { while ((*s = *t)!= '\0') { s++; t++; } }
Σπλαξηήζεηο ζύγθξηζεο (string.h) Σύγθξηζε αιθαξηζκεηηθώλ Σπγθξίλνληαη νη ASCII θώδηθεο ησλ ραξαθηήξσλ int strcmp( const char *s1, const char *s2 ); Σπγθξίλεη ην s1 κε ην s2 Δπηζηξέθεη: αξλεηηθό αξηζκό αλ s1 < s2, κεδέλ αλ s1 == s2, ή ζεηηθό αλ s1 > s2
Σπλαξηήζεηο ζύγθξηζεο (string.h) Σύγθξηζε αιθαξηζκεηηθώλ Σπγθξίλνληαη νη ASCII θώδηθεο ησλ ραξαθηήξσλ int strcmp( const char *s1, const char *s2 ); Σπγθξίλεη ην s1 κε ην s2 Δπηζηξέθεη: αξλεηηθό αξηζκό αλ s1 < s2, κεδέλ αλ s1 == s2, ή ζεηηθό αλ s1 > s2 Αιθαξηζκεηηθή ζεηξά, κε βάζε ην ASCII. Κάζε ςεθίν/ραξαξαθηήξαο ηνπ string είλαη έλα ςεθίν ηνπ αξηζκνύ/αιθαξεζκεηηθνύ ζην 256-δηθό ζύζηεκα.
strcmp /* strcmp: return <0 if s<t, 0 if s==t, >0 if s>t */ int strcmp(char *s, char *t) { int i; for (i = 0; s[i] == t[i]; i++) if (s[i] == '\0') return 0; return s[i] t[i]; }
strcmp /* strcmp: return <0 if s<t, 0 if s==t, >0 if s>t */ int strcmp(char *s, char *t) { for ( ; *s == *t; s++, t++) if (*s == '\0') return 0; return *s *t; }
char myname [ 21 ] = Huang ; char yourname [ 21 ] ; if ( myname == yourname ) // compares addresses only! { // That is, 4000 and 6000 here. }. // DOES NOT COMPARE CONTENTS!.. 4000 myname [0] H u a n g \0... 6000 yourname [0] H e a d i n g t o n \0...
char myname [ 21 ] = Huang ; char yourname [ 21 ] ; strcpy ( yourname, myname ) ; // changes string yourname // OVERWRITES CONTENTS! 4000 myname [0] H u a n g \0... yourname [0] 6000 u n g \0 H e a d i n g t o n \0...
The string Class in C++ C++ has a <string> library Include it in your programs when you wish to use strings: #include <string> In this library, a class string is defined and implemented
Declaration of strings The following instructions are all equivalent. They declare x to be an object of type string, and assign the string high school to it: string x( high school ); string x= high school ; string x; x= high school ;
Operations on strings (Concatenation) Let x and y be two strings To concatenate x and y, write: x+y string x= high ; string y= school ; string z; z=x+y; cout<< z= <<z<<endl; z =z+ was fun ; cout<< z= <<z<<endl; Output: z=highschool z= highschool was fun
Concatenation of Mixed-Style Strings In s=u+v+w; u can be A string object, or where s is of type string, a C-style string (a char array or a char pointer), a C-style char or a double-quoted string, or a single-quoted character. Same with v and w. At least u or v or w must be a string object
Παπάδειγμα string x= high ; char y[]= school ; char z[]= { w, a, s, \0 }; char *p = good ; string s= x+y+ +z+ very + +p+! ; cout<< s= <<s<<endl; cout<< s= +s<<endl; Output: s=highschool was very good! s=highschool was very good!
The concat-assign Operator += Assume x is a string object. The statement x += y; is equivalent to x=x+y; where y can be a string object, a C-style string variable, a char variable, a double-quoted string, or a single-quoted char.
Comparison Operators for string Objects We can compare two strings x and y using the following operators: ==,!=, <, <=, >, >= The comparison is alphabetical (ASCII) The outcome of each comparison is: true or false The comparison works as long as at least x or y is a string object. The other string can be a string object, a C-style string variable, or a double-quoted string.
Example of String Comparisons string x= high ; char y[]= school ; char *p = good ; if (x<y) cout<< x<y <<endl; if (x< tree ) cout<< x<tree <,endl; if ( low!= x) cout<< low!= x <<endl; if( (p>x) cout<< p>x <<endl; else cout<< p<=x <<endl; Output: x<y x<tree low!= x p>x
The Index Operator [] If x is a string object, and you wish to obtain the value of the k-th character in the string, you write: x[k]; This feature makes string objects string x= appear high ; like arrays of chars. char c=x[0]; // c is h c=x[1]; // c is i c=x[2]; // c is g
Getting a string Object Length & Checking for Emptiness To obtain the length of a string object x, call the method length() or size(): int len=x.length( ); --or-- int len=x.size( ); To check of x is empty (that is, has no characters in it): bool x.empty();
Obtaining Substrings of Strings Logically, a substring of a string x is a subsequence of consecutive characters in x For example, rod is a substring of product If x is a string object, and we want the substring that begins at position pos and has len characters (where pos and len are of type int), write: string y = x.substr(pos,len); The default value of len is x.length( ) string y = x.substr(pos);//x[pos..end-1] The default value for pos is 0
Inserting a String Inside Another Suppose x is a string object, and let y be another string to be inserted at position pos of the string of x To insert y, do: The argument y can be: a string object, a C-style string variable, or a double-quoted string x.insert(pos,y);
Replacing a Substring by Another Suppose x is a string object, and suppose you want to replace the characters in the range [pos,pos+len) in x by a string y. To do so, write: The argument y can be: a string object, a C-style string variable, or a double-quoted string x.replace(pos,len,y);
Deleting (Erasing) a Substring of a string Object Suppose x is a string object, and suppose you want to delete/erase the characters in the range [pos,pos+len) in x. To do so, write: The default value of len is the x.length( ) x.erase(pos,len); The default value for pos is 0 To erase the whole string of x, do: x.erase(pos); // erases x[pos..end-1] x.clear( );
Searching for (and Finding) Patterns in Strings Suppose x is a string object, and suppose you want to search for a string y in x. To do so, write: int startloc = x.find(y); This method returns the starting index of the leftmost occurrence of y in x, if any occurrence exits; otherwise, the method returns the length of x. To search starting from a position pos, do int startloc = x.find(y, pos);
Searching for Patterns (Contd.) In all the versions of find and rfind, the argument y can be a string object, a C-style string variable, double-quoted string, a char variable, or a singlequoted char. startloc = x.rfind(y); // or startloc = x.rfind(y, pos);
An Example string x= FROM:ayoussef@gwu.edu ; int colonpos=x.find( : ); string prefix=x.substr(0,colonpos); //=FROM string suffix = x. substr(colonpos+1); cout<< -This message is from <<suffix<<endl; Output: -This message is from ayoussef@gwu.edu
Trimming Leading & Trailing Spaces // this function removes leading and trailing spaces from x void trim (string& x){ int k = 0; // k will procced to the first non-blank char while(k<x.size() &&(x[k]==' ' x[k]=='\t' x[k]=='\n')) k++; x.erase(0,k); } int s=x.size(); // s will move backward to the rightmost non-blank char while(s>0 &&(x[s-1]==' ' x[s-1]=='\t' x[s-1]=='\n')) s--; x.erase(s);
Correspondence between the C library and the C++ string Class C Library Functions C++ string operators/methods strcpy strcat = (the assignment operator) += (assign+concat operator) strcmp = =,!=, <, >, <=, >= strchr, strstr strrchr.find( ) method.rfind( ) method strlen.size( ) or.length( ) methods
Char Functions in C (and C++) The <ctype.h> library in C provides useful functions for single char variables The next slide gives the most common char functions. Although the input argument appears to be of type int, it is actually a char.
Γηαρείξηζε Αιθαξηζκεηηθώλ Βξίζθνληαη ζην <stdlib.h> Μεηαηξέπνπλ αιθαξηζκεηηθά (αλ είλαη θαηάιιεια) ζε αξηζκεηηθέο ηηκέο Prototype double atof( const char *nptr ) int atoi( const char *nptr ) long atol( const char *nptr ) Description Converts the string nptr to double. Converts the string nptr to int. Converts the string nptr to long int.
Σπλαξηήζεηο Αλάγλσζεο θαη Δθηύπσζεο (C) Βξίζθνληαη ζην <stdio.h> int sprintf(char *s, const char *format, ) Ιζνδύλακε κε ηελ printf κόλν πνπ ε έμνδνο είλαη ζην string s θαη όρη ζηελ νζόλε int sscanf(char *s, const char *format, ) Ιζνδύλακε κε ηελ scanf κόλν πνπ ε είζνδνο είλαη από ην string s θαη όρη από ην πιεθηξνιόγην char s[100]; char f[] = "1.45 2.2 0.12"; float t1,t2,t3; sprintf(s,"%s",f); sscanf(s,"%f %f %f",&t1,&t2,&t3); printf("s = %s\nt1 = %f t2 = %f t3 = %f\n",s,t1,t2,t3);
String literals Evaluating dog results in memory allocated for three characters d, o, g, plus terminating NUL char *m = dog ; Note: If m is an array name, subtle difference: char m[10] = dog ; 10 bytes are allocated for this array This is not a string literal; It s an array initializer in disguise! Equivalent to { d, o, g, \0 }
String manipulation functions Read some source string(s), possibly write to some destination location char *strcpy(char *dst, char const *src); char *strcat (char *dst, char const *src); Programmer s responsibility to ensure that: destination region large enough to hold result source, destination regions don t overlap undefined behavior in this case according to C spec, anything could happen! Assuming that the implementation of strcpy char m[10] = dog ; starts copying left-to-right without checking for the presence of a terminating NUL first, what will strcpy(m+1, m); happen?
strlen() and size_t size_t strlen(char const *string); /* returns length of string */ size_t is an unsigned integer type, used to define sizes of strings and (other) memory blocks Reasonable to think of size as unsigned... But beware! Expressions involving strlen() may be unsigned (perhaps unexpectedly) if (strlen(x) strlen(y) >= 0)... avoid by casting: always true! ((int) (strlen(x) strlen(y)) >= 0) Problem: what if x or y is a very large string? a better alternative: (strlen(x) >= strlen(y))
strcmp() string comparison int strcmp(char const *s1, char const *s2); returns a value less than zero if s1 precedes s2 in lexicographical order; returns zero if s1 and s2 are equal; returns a value greater than zero if s1 follows s2. Source of a common mistake: seems reasonable to assume that strcmp returns true (nonzero) if s1 and s2 are equal; false (zero) otherwise In fact, exactly the opposite is the case!
Restricted vs. unrestricted string functions Restricted versions: require an extra integer argument that bounds the operation char *strncpy(char *dst, char const *src, size_t len); char *strncat(char *dst, char const *src, size_t len); int strncmp(char const *s1, char const *s2, size_t len); safer in that they avoid problems with missing NUL terminators safety concern with strncpy: If bound isn t large enough, terminating NUL won t be written Safe alternative: strncpy(buffer, name, BSIZE); buffer[bsize-1] = \0 ; - SUBSTRINGS
String searching char *strpbrk(char const *str, char const *group); /* return a pointer to the first character in str that matches *any* character in group; return NULL if there is no match */ size_t *strspn(char const *str, char const *group); /* return number of characters at beginning of str that match *any* character in group */ Ο pointer δείρλεη «κέζα» ζην αιθαξεζκεηηθό. Χξεηαδόκαζηε σζηόζν λα, ζπκόκαζηε, κε άιιν pointer, θαη ηελ αξρή ηνπ αιθαξεζκεηηθνύ.
strtok string tokenizer char *strtok(char *s, char const *delim); /* delim contains all possible tokens : characters that separate tokens. if delim non-null: return ptr to beginning of first token in s, and terminate token with NUL. if delim is NULL: use remainder of untokenized string from the last call to strtok */
strtok in action for ( token = strtok(line, whitespace); token!= NULL; token = strtok(null, whitespace)) printf( Next token is %s\n, token); d o g NUL c a t NUL NUL line NUL token
An implementation of strtok char* strtok(char *s, const char *delim) { static char *old = NULL; old contains the remains of an earlier s value char *token; (note use of static) if (! s) { s = old; if (! s) return NULL; } } if (s) { NULL has been passed in for s, s += strspn(s, delim); so consult old if (*s == 0) { old = NULL; return NULL; } } token = s; s = strpbrk(s, delim); if (s == NULL) old = NULL; else { *s = 0; old = s + 1; } return token; strspn returns number of delimiters at beginning of s skip past these characters strpbrk gives the position of the next delimiter. s is updated to this position, but token still points to the token to return.
main( ) { char name1[12],name2[12],mixed[25]; char title[20]; strcpy(name1,"rosalinda"); strcpy(name2,"zeke"); strcpy(title,"this is the title."); printf(" %s\n\n"title); printf("name 1 is %s\n",name1); printf(name 2 is %s\n",name2); if (strcmp(name1,name2)>0) /* return 1 if name1 > name2 */ strcpy(mixed,name1); else strcpy(mixed,name2); printf("the biggest name alphabetically is %s\n",mixed); strcpy(mixed,name1); strcat(mixed," "); strcat(mixed,name2); printf("both names are %s\n",mixed); }
Array of strings
The strchr function char *strchr(const char *s, int c); The strchr() function shall locate the first occurrence of c (converted to a char) in the string pointed to by s. The terminating null byte is considered to be part of the string. The function returns the location of the found character, or a null pointer if the character was not found. #include <string.h> /* strchr */ char *(strchr)(const char *s, int c) { /* Scan s for the character. When this loop is finished, s will either point to the end of the string or the character we were looking for. */ } while (*s!= '\0' && *s!= (char)c) s++; return ( (*s == c)? (char *) s : NULL );
The strstr function char *strstr(const char *haystack, const char *needle); The strstr() function shall locate the first occurrence in the string pointed to by haystack of the sequence of bytes (excluding the terminating null byte) in the string pointed to by needle. The function returns the pointer to the matching string in haystack or a null pointer if a match is not found. If needle is an empty string, the function returns haystack. #include <string.h> /* strstr */ char *(strstr)(const char *haystack, const char *needle) { size_t needlelen; /* Check for the null needle case. */ if (*needle = = '\0') return (char *) haystack; needlelen = strlen(needle); for (; (haystack = strchr(haystack, *needle))!= NULL; haystack++) if (strncmp(haystack, needle, needlelen) == 0) return (char *) haystack; return NULL; }