ROSE  0.9.6a
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
string_functions.h
Go to the documentation of this file.
1 #ifndef ROSE_STRING_UTILITY_H
2 #define ROSE_STRING_UTILITY_H
3 
4 // Move this to rose.h or a build a rose_utility.h file later
6 #include <vector>
7 #include <map>
8 #include <string>
9 #include <sstream>
10 #include <stdint.h>
11 #if ROSE_MICROSOFT_OS
12 // This is the boost solution for lack of support for stdint.h (e.g. types such as "uint64_t")
13 #include <msvc_stdint.h>
14 #else
15 #endif
16 
17 // DQ (2/22/2014): Used below to control use of __int128 type.
18 // However, this file can't be included here and must be included
19 // in the C source file calling this header file.
20 // #include "rose_config.h"
21 
22 // extern const char** roseGlobalVariantNameList;
23 //Rama: 12/14/06: Changed the class to namespace and removed 'static'ness of the erstwhile "member functions"
24 //There is still a lot of clean up do be done: Like
25 // reorganize the functions
26 // Make a single utililities namespace for ROSE with different functionalites
27 // including the functions in util directory.
28 
29 namespace StringUtility
30  {
31  // Container class for numerous string utility functions that are useful in different parts of the ROSE project.
32  // Rama (12/22/2006): Clearly, some of the following are not string utilities, but file utilities.
33  // This class is the wrapper around the realpath of UNIX
34 
35  ROSE_UTIL_API std::string getAbsolutePathFromRelativePath ( const std::string & relativePath, bool printErrorIfAny = false); // Real declaration is below
36 
38  {
39  std::string str; // DQ (1/23/2010): this name is difficult to trace within the code.
40  std::string filename; // Empty string means generated code
41  unsigned int line;
42 
43  StringWithLineNumber(const std::string& str, const std::string& filename, unsigned int line): str(str), filename(filename), line(line) {}
44 
45  ROSE_UTIL_API std::string toString() const;
46  };
47 
48 #ifndef USE_ROSE
49  typedef std::vector<StringWithLineNumber> FileWithLineNumbers;
50 #else
51  // workaround of bug 315, separating definitions for a namespace
52  // Liao, 2/16/2009
53  }
54 
55 namespace StringUtility
56  {
57  typedef std::vector<StringUtility::StringWithLineNumber> FileWithLineNumbers;
58 #endif
59 
60  inline std::ostream& operator<<(std::ostream& os, const StringWithLineNumber& s) {
61  os << s.toString();
62  return os;
63  }
64 
65  ROSE_UTIL_API std::string toString(const FileWithLineNumbers& strings, const std::string& filename = "<unknown>", int line = 1);
66 
68  a.insert(a.end(), b.begin(), b.end());
69  return a;
70  }
71 
73  FileWithLineNumbers f = a;
74  f += b;
75  return f;
76  }
77 
78 #if 0
79  inline std::ostream& operator<<(std::ostream& os, const FileWithLineNumbers& f) {
80  os << StringUtility::toString(f);
81  return os;
82  }
83 #endif
84 
85  inline FileWithLineNumbers& operator<<(FileWithLineNumbers& f, const std::string& str) {
86  // Add loose text to the output file
87  if (!f.empty() && f.back().filename == "") {
88  f.back().str += str;
89  } else {
90  f.push_back(StringWithLineNumber(str, "", 1));
91  }
92  return f;
93  }
94 
95  inline FileWithLineNumbers& operator<<(FileWithLineNumbers& f, const char* str) {
96  f << std::string(str);
97  return f;
98  }
99 
100 #if 0
101  // enum VariantT;
103  std::string getVariantName ( int v );
104 #endif
105 
107  ROSE_UTIL_API void writeFile ( const std::string& outputString, const std::string& fileNameString, const std::string& directoryName );
108 
110  ROSE_UTIL_API std::string readFile ( const std::string& fileName );
111 
113  ROSE_UTIL_API FileWithLineNumbers readFileWithPos(const std::string& fileName);
114 
122  ROSE_UTIL_API std::string copyEdit ( const std::string& inputString, const std::string & oldToken, const std::string & newToken );
125  ROSE_UTIL_API std::string numberToString ( long long x );
126  ROSE_UTIL_API std::string numberToString ( unsigned long long x );
127  ROSE_UTIL_API std::string numberToString ( long x );
128  ROSE_UTIL_API std::string numberToString ( unsigned long x );
129  ROSE_UTIL_API std::string numberToString ( int x );
130  ROSE_UTIL_API std::string numberToString ( unsigned int x );
132  ROSE_UTIL_API std::string intToHex(uint64_t i);
134  // string numberToString ( unsigned int x );
135  // ROSE_UTIL_API std::string numberToString ( size_t x );
136 
137 #ifndef _MSC_VER
138 // #if !defined(__STRICT_ANSI__) && defined(_GLIBCXX_USE_INT128)
139 // #if ((BACKEND_CXX_COMPILER_MAJOR_VERSION_NUMBER == 4) && (BACKEND_CXX_COMPILER_MINOR_VERSION_NUMBER > 6))
140  #if (defined(BACKEND_CXX_IS_GNU_COMPILER) && (((BACKEND_CXX_COMPILER_MAJOR_VERSION_NUMBER == 4) && (BACKEND_CXX_COMPILER_MINOR_VERSION_NUMBER > 6)) || (BACKEND_CXX_COMPILER_MAJOR_VERSION_NUMBER > 4)))
141  // DQ (2/22/2014): Required code for GNU versions greater than 4.6.
142  ROSE_UTIL_API std::string numberToString ( __int128 x );
143  ROSE_UTIL_API std::string numberToString ( unsigned __int128 x );
144  #endif
145 #endif
146 
147  // DQ (8/10/2010): Changed to take parameter as const.
149  ROSE_UTIL_API std::string numberToString ( const void* x );
150 
152  ROSE_UTIL_API std::string numberToString ( double x );
153 
155  // indicates the number of significant bits.
156  ROSE_UTIL_API std::string addrToString(uint64_t x, size_t nbits=32);
157 
159  ROSE_UTIL_API std::string indentMultilineString ( const std::string& inputString, int statementColumnNumber );
160 
162  ROSE_UTIL_API std::string listToString ( const std::list<int> & X, bool separateStrings = false );
164  ROSE_UTIL_API std::string listToString ( const std::list<std::string> & X, bool separateStrings = false );
166  ROSE_UTIL_API std::list<std::string> stringToList ( const std::string & X );
167 
169  ROSE_UTIL_API std::string listToString ( const std::vector<std::string> & X, bool separateStrings = false );
170 
171  ROSE_UTIL_API std::list<std::string> tokenize ( std::string X, char delim );
172 
174  ROSE_UTIL_API std::string removeRedundentSubstrings ( std::string X ); // sic
176  ROSE_UTIL_API std::string removePseudoRedundentSubstrings ( std::string X ); // sic
178  ROSE_UTIL_API void add_to_reason_string(std::string &result, bool isset, bool do_pad,
179  const std::string &abbr, const std::string &full);
191  // int isSameName ( const std::string& s1, const std::string& s2 );
193 
194  // char* stringDuplicate ( const char* tempString );
195  ROSE_UTIL_API std::string copyEdit ( const std::string& inputString, const std::string& oldToken, const std::string& newToken );
196  ROSE_UTIL_API FileWithLineNumbers copyEdit ( const FileWithLineNumbers& inputString, const std::string& oldToken, const std::string& newToken );
197  ROSE_UTIL_API FileWithLineNumbers copyEdit ( const FileWithLineNumbers& inputString, const std::string& oldToken, const FileWithLineNumbers& newToken );
198  // bool isContainedIn ( const char* longString, const char* shortString );
199  inline bool isContainedIn ( const std::string & longString, const std::string & shortString ) {
200  return longString.find(shortString) != std::string::npos;
201  }
202 
203 
205  // char* stringConcatinate ( const char* targetString , const char* endingString );
206 
208  void splitStringIntoStrings( const std::string& inputString, char separator, std::vector<std::string>& stringList );
211  // unsigned short int chksum(char *buffer, int len);
213  unsigned long generate_checksum( std::string s );
214 
216  ROSE_UTIL_API std::string convertToLowerCase( const std::string & inputString );
217 
218  // std::string mangledName ( std::string s );
219 
221  bool popen_wrapper ( const std::string & command, std::vector<std::string> & result );
222 
224  std::string demangledName ( std::string s );
225 
226 //--------------------------------------------------------------
228 
232  // DQ (3/5/2006): Copies from ROSE class (deprecated in there previous location)
234  ROSE_UTIL_API std::string stripPathFromFileName ( const std::string & fileNameWithPath );
236  ROSE_UTIL_API std::string getPathFromFileName ( const std::string & fileNameWithPath );
238  ROSE_UTIL_API std::string stripFileSuffixFromFileName ( const std::string & fileNameWithSuffix );
240  ROSE_UTIL_API std::string getAbsolutePathFromRelativePath ( const std::string & relativePath, bool printErrorIfAny /* = false */ );
242  // If no dot is found in the input fileName, the function just returns the original fileName
243  ROSE_UTIL_API std::string fileNameSuffix ( const std::string & fileName );
244 
245  // True only if this is a valid C++ source file name extension (suffix). Duplicate of CommandlineProcessing::isCppFileNameSuffix().
246 // bool isCppFileNameSuffix ( const std::string & fileName );
247 
249  /*
250  * The function
251  * findfile
252  * traverse the current directory, searching
253  * for files with a given string in their name.
254  * input: string to match and directory to match it in.
255  * output: any file found, returned as a list of strings with a full path.
256  */
257  std::list<std::string> findfile(std::string patternString, std::string pathString);
258 
260  ROSE_UTIL_API std::string escapeNewLineCharaters ( const std::string & X );
261 
262  // RSS 7/1/2008 New functionality to support filename processing
263  enum OSType
264  {
270 
271  // Return OSType based on uname kernel name results
273 
274  // Populate homeDir from $HOME environment var
275  void homeDir(std::string& homeDir);
276 
277  /* Files can be classified as being in one of three
278  * locations: We don't know if it's user or system It is a
279  * user (application) file It is a system library This file
280  * does not exist */
282  {
287 
288  /* Files can be classified as being part of one of these
289  * libraries: Unknown, it isn't a library - it's part of
290  * the user application, or any of the libraries that the
291  * enum values imply, this list will likely be added to
292  * over time */
293  /*
294  enum FileNameLibrary { FILENAME_LIBRARY_UNKNOWN,
295  FILENAME_LIBRARY_USER,
296  FILENAME_LIBRARY_C,
297  FILENAME_LIBRARY_STDCXX,
298  FILENAME_LIBRARY_STL,
299  FILENAME_LIBRARY_LINUX,
300  FILENAME_LIBRARY_GCC,
301  FILENAME_LIBRARY_BOOST,
302  FILENAME_LIBRARY_ROSE };
303  */
304 
305  static const std::string FILENAME_LIBRARY_UNKNOWN = "Unknown";
306  static const std::string FILENAME_LIBRARY_USER = "User";
307  static const std::string FILENAME_LIBRARY_C = "C";
308  static const std::string FILENAME_LIBRARY_STDCXX = "C++";
309  static const std::string FILENAME_LIBRARY_STL = "STL";
310  static const std::string FILENAME_LIBRARY_LINUX = "Linux";
311  static const std::string FILENAME_LIBRARY_GCC = "GCC";
312  static const std::string FILENAME_LIBRARY_BOOST = "Boost";
313  static const std::string FILENAME_LIBRARY_ROSE = "Rose";
314 
315  // CH (2/16/2010): Use this typedef to avoid following changes
316  typedef std::string FileNameLibrary;
317 
318  /* This is the return type of classifyFileName, which
319  * provides all the details it infers */
321  {
322  private:
324 
325  // CH (2/12/2010): Change 'library' type from enum to string to let user set it
327 
328  int distance;
329 
330  public:
332  const FileNameLibrary& lib,
333  int dist) : location(loc),
334  library(lib),
335  distance(dist)
336  {}
338  library("Unknown"),
339  distance(0)
340  {}
341 
342  /* Return the FileNameLocation which is described above
343  * with the definition of the enum */
345  { return location; }
346 
347  /* Return the FileNameLibrary which is described above
348  * with the definition of the enum */
350  { return library; }
351 
352  /* Return the "distance" of the filename from the
353  * appPath that was supplied during the call. The
354  * distance is defined as the number of cd's that only
355  * move up or down one directory that it would take to
356  * move from the directory of the filename to the
357  * directory that was given by appPath. This is
358  * intended as a heuristic to gage whether or not one
359  * believes that the filename is related to the source
360  * (appPath) directory. Examples:
361  *
362  * Between /a/b/c/file.h and /a/b/d/e/ the distance is 3
363  * because one must cd ..; cd d; cd e; to get to appPath
364  *
365  * *EXCEPTION*: if the appPath is an ancestor of filename
366  * then the distance will be 0. The idea being that this
367  * filename is "in" the appPath somewhere and thus part
368  * of the application.
369  */
371  { return distance; }
372 
373  bool isUserCode() const
374  { return location == FILENAME_LOCATION_USER; }
375  bool isLibraryCode() const
376  { return location == FILENAME_LOCATION_LIBRARY; }
377 
378  /* Return a string name for the library indicated by
379  * getLibrary() */
380  std::string getLibraryName() const
381  { return library; }
382  };
383 
384  /* Given a fileName and an appPath that is a path to some
385  * application's source code directory, return a
386  * FileNameClassification indicating whether the fileName
387  * is part of the source code or some system library and
388  * automatically determine the operating system from the
389  * host uname */
390  ROSE_UTIL_API FileNameClassification classifyFileName(const std::string& fileName,
391  const std::string& appPath);
392 
393  /* Given a fileName and an appPath that is a path to some
394  * application's source code directory, return a
395  * FileNameClassification indicating whether the fileName
396  * is part of the source code or some system library */
397  ROSE_UTIL_API FileNameClassification classifyFileName(const std::string& fileName,
398  const std::string& appPath,
399  OSType os);
400 
401  /* Given a fileName and an appPath that is a path to some
402  * application's source code directory, and a collection
403  * of library paths, return a FileNameClassification
404  * indicating whether the fileName is part of the source
405  * code or some system library and automatically determine
406  * the operating system from the host uname */
407  ROSE_UTIL_API FileNameClassification classifyFileName(const std::string& fileName,
408  const std::string& appPath,
409  const std::map<std::string, std::string>& libPathCollection);
410 
411  /* Given a fileName and an appPath that is a path to some
412  * application's source code directory, and a collection
413  * of library paths, return a FileNameClassification
414  * indicating whether the fileName is part of the source
415  * code or some system library */
416  ROSE_UTIL_API FileNameClassification classifyFileName(const std::string& fileName,
417  const std::string& appPath,
418  const std::map<std::string, std::string>& libPathCollection,
419  OSType os);
420 
421  /* Remove leading dots plus a space from a header file name
422  * that is fiven in the format that g++ -H returns */
423  ROSE_UTIL_API const std::string
424  stripDotsFromHeaderFileName(const std::string& name);
425 
426  /* Essentially the edit distance without substituion in
427  * directory name tokens between two directories. Returns
428  * the "distance" between left and right. The distance is
429  * defined as the number of cd's that only move up or down
430  * one directory that it would take to move from the
431  * directory of the filename to the directory that was
432  * given by appPath. This is intended as a heuristic to
433  * gage whether or not one believes that the left is
434  * related to the right directory. Examples:
435  *
436  * Between /a/b/c/file.h and /a/b/d/e/ the distance is 3
437  * because one must cd ..; cd d; cd e */
438  ROSE_UTIL_API int directoryDistance(const std::string& left,
439  const std::string& right);
440 
441 
442  /* Added htmlEscape necessary for QROSE work to this utility library - tps (9Oct2008) */
443  ROSE_UTIL_API std::string htmlEscape(const std::string& s);
444 
445  // DQ (2/3/2009): Moved this function from attach_all_info.C
446  ROSE_UTIL_API std::vector<std::string> readWordsInFile( std::string filename);
447 
457  ROSE_UTIL_API std::string toHex2(uint64_t value, size_t nbits,
458  bool show_unsigned_decimal=true, bool show_signed_decimal=true,
459  uint64_t decimal_threshold=256);
460  ROSE_UTIL_API std::string signedToHex2(uint64_t value, size_t nbits);
461  ROSE_UTIL_API std::string unsignedToHex2(uint64_t value, size_t nbits);
462 
463  template<typename T> std::string toHex(T value) { return toHex2((uint64_t)value, 8*sizeof(T)); }
464  template<typename T> std::string signedToHex(T value) { return signedToHex2((uint64_t)value, 8*sizeof(T)); }
465  template<typename T> std::string unsignedToHex(T value) { return unsignedToHex2((uint64_t)value, 8*sizeof(T)); }
482  ROSE_UTIL_API std::string appendAsmComment(const std::string &s, const std::string &comment);
483 
489  ROSE_UTIL_API std::string prefixLines(const std::string &lines, const std::string &prefix,
490  bool prefixAtFront=true, bool prefixAtBack=false);
491 
494  ROSE_UTIL_API bool isLineTerminated(const std::string &s);
495 
500  ROSE_UTIL_API std::string fixLineTermination(const std::string &input);
501 
510  ROSE_UTIL_API std::string makeOneLine(const std::string &s, std::string replacement=" ");
511 
516  std::string encode_base64(const std::vector<uint8_t> &data, bool do_pad=true);
517  std::string encode_base64(const uint8_t *data, size_t nbytes, bool do_padd=true);
521  std::vector<uint8_t> decode_base64(const std::string &encoded);
522 
527  template<class Container>
528  std::string join(const std::string &separator, const Container &strings) {
529  return join_range(separator, strings.begin(), strings.end());
530  }
531  template<class Iterator>
532  std::string join_range(const std::string &separator, Iterator begin, Iterator end) {
533  std::string retval;
534  for (Iterator i=begin; i!=end; ++i)
535  retval += (i==begin ? std::string() : separator) + *i;
536  return retval;
537  }
538  std::string join(const std::string &separator, char *strings[], size_t nstrings);
539  std::string join(const std::string &separator, const char *strings[], size_t nstrings);
549  ROSE_UTIL_API std::vector<std::string> split(const std::string &separator, const std::string &str, size_t maxparts=(size_t)(-1),
550  bool trim_white_space=false);
551  ROSE_UTIL_API std::vector<std::string> split(char separator, const std::string &str, size_t maxparts=(size_t)(-1),
552  bool trim_white_space=false);
553 
556  ROSE_UTIL_API std::string trim(const std::string &str, const std::string &strip=" \t\r\n", bool at_beginning=true, bool at_end=true);
557 
559  std::string untab(const std::string &str, size_t tabstops=8, size_t firstcol=0);
560 
588  template<class Container, class Stringifier>
589  std::vector<std::string> toStrings(const Container &numbers, const Stringifier &stringifier=numberToString) {
590  return toStrings_range(numbers.begin(), numbers.end(), stringifier);
591  }
592  template<class Iterator, class Stringifier>
593  std::vector<std::string> toStrings_range(Iterator begin, Iterator end, const Stringifier &stringifier=numberToString) {
594  std::vector<std::string> retval;
595  for (/*void*/; begin!=end; ++begin)
596  retval.push_back(stringifier(*begin));
597  return retval;
598  }
616  template<typename T>
617  std::string plural(T n, const std::string &plural_word) {
618  assert(!plural_word.empty());
619  std::string retval = numberToString(n) + " ";
620  if (1==n) {
621  if (plural_word.size()>3 && 0==plural_word.substr(plural_word.size()-3).compare("ies")) {
622  // string ends with "ies", as in "parties", so emit "party" instead
623  retval += plural_word.substr(0, plural_word.size()-3) + "y";
624  } else if (plural_word.size()>1 && plural_word[plural_word.size()-1]=='s') {
625  // just drop the final 's'
626  retval += plural_word.substr(0, plural_word.size()-1);
627  } else {
628  // I give up. Use the plural and risk being grammatically incorrect.
629  retval += plural_word;
630  }
631  } else {
632  retval += plural_word;
633  }
634  return retval;
635  }
636 
637 } // namespace
638 
639 #endif