During localization work, you’ll sometimes discover that the grammar is quite different from one language to another. What might be great for English actually needs to be changed extensively for another language.
For example, in English you might say “There are 10 apples and 20 oranges!” and to do that in code you might have something like this:
int num_apples = 10; int num_oranges = 20; sprintf(temp_string, "There are %d apples and %d oranges!", num_apples, num_oranges);
But in another language (when translated), it’s entirely possible for the string to change to “There are 20 oranges and 10 apples!” forcing you to change the code to something like this:
int num_apples = 10; int num_oranges = 20; sprintf(temp_string, "There are %d oranges and %d apples!", num_oranges, num_apples);
…causing both the string and the arguments to change.
What do you do if you have more than 2 languages to support?
You could create a switch() statement and handle it that way, but that’s really clunky and not extendible; consider what you’d have to do if you have to support 30+ languages. It suddenly becomes a Herculean task.
What if there was a way to just change the string and leave the arguments alone?
Introducing sprintf_locale()
sprintf_locale() is an ANSI-compliant extension to sprintf() I wrote that takes the headache out of formatting arguments for strings.
It’s really just a wrapper for sprintf() as the heavy-lifting is still done by the original sprintf(), so it handles all of the same formatting strings, specifications, etc. that sprintf() handles.
The real magic is how to tell it which argument you want to use. I do that with the ‘$n$’ extension (where n is the 0-based argument number that applies to this part of the string).
Here’s the original line of code:
sprintf(temp_string, "There are %d oranges and %d apples!", num_oranges, num_apples);
To convert it to the new extension, you would change it to:
sprintf_locale(temp_string, "There are %$0$d apples and %$1$d oranges!", num_apples, num_oranges);
$0$ is the ID for the 0th argument (num_apples) and $1$ is the ID for num_oranges.
With that in mind, you’re able to do something like this:
sprintf_locale(temp_string, "There are %$1$d oranges and %$0$d apples!", num_apples, num_oranges);
Do you see the difference on line 2? The string has changed but the actual arguments are the same.
It doesn’t matter where you put the ‘$n$’ in the spec, but it’s a good habit to put it immediately after the %.
It also doesn’t matter that I’m using $ for the start- and end delimiters. You can still put a $ anywhere in your string because the $n$ extension happens between the % and the spec type (d, s, etc.)
It’s Optional!
What makes this extension extra cool is that the new behavior is completely optional; you can totally ignore it if wish. To use the ‘normal’ sprintf() behavior, just use it exactly the same as sprintf() (that is, don’t specify any $n$), like so:
sprintf_locale(temp_string, "There are %d apples and %d oranges!", num_apples, num_oranges);
Example Usage
char temp_string[1024]; int num_apples = 10; int num_oranges = 20; sprintf_locale(temp_string, "There are %$0$d apples and %$1$d oranges!", num_apples, num_oranges); printf("string: '%s'\n", temp_string); sprintf_locale(temp_string, "There are %$1$d oranges and %$0$d apples!", num_apples, num_oranges); printf("string: '%s'\n", temp_string); // 'mixed format' example sprintf_locale(temp_string, "There are %$1$d oranges and 10 %$0$s!", "apples", num_oranges); printf("string: '%s'\n", temp_string);
Gives you this output:
string: 'There are 10 apples and 20 oranges!' string: 'There are 20 oranges and 10 apples!' string: 'There are 20 oranges and 10 apples!'
sprintf_locale.cpp
Here is the source to the full routine (code is BSD License, use at your own risk).
Note: The 64 on lines 45 and 81 is arbitrary. This specifies how long each spec string can be (64 is quite a long one, so should be fine).
#include <stdarg.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <ctype.h> int sprintf_locale(char * buffer, const char * format, ...) { // perform some housekeeping if (!buffer) { // the destination is NULL return(-1); } if (!format) { // the format string is NULL return(-1); } const int format_length = strlen(format); if (0 == format_length) { // the format string is empty return(-1); } // do the actual work va_list arg_list; va_start(arg_list, format); char * dest_ptr = buffer; int expanded_length = 0; int current_arg_number = -1; // -1 means "use the next one" (default) int format_counter = 0; bool is_percent_string = false; char embedded_spec[64]; int embedded_index = 0; do { char current_char = format[format_counter]; char next_char = format[format_counter + 1]; if (false == is_percent_string) { // is the current character a %? if ( ('%' == current_char) && ('%' != next_char) ) { // yes; start building the embedded spec is_percent_string = true; embedded_spec[0] = current_char; embedded_spec[1] = 0; embedded_index = 1; } else { // no; copy it to the destination buffer *dest_ptr++ = current_char; *dest_ptr = 0; // NULL-terminate expanded_length++; } format_counter++; } else { // make sure there's space in the temp buffer if (embedded_index >= 64) { // (you could probably assert here instead) va_end(arg_list); return(-1); } // is the current character a $? if ('$' == current_char) { // yes; get the arg number char dollar_string[64]; int dollar_index = 0; format_counter++; // skip the opening $ // loop until the ending $ do { current_char = format[format_counter]; if (isdigit(current_char)) { dollar_string[dollar_index] = current_char; dollar_index++; dollar_string[dollar_index] = 0; } format_counter++; } while ('$' != current_char); current_arg_number = atoi(dollar_string); } else { // no; keep building the embedded string embedded_spec[embedded_index] = current_char; embedded_index++; embedded_spec[embedded_index] = 0; // see if it's time to stop building the embedded string if ( ('c' == current_char) || ('C' == current_char) || ('d' == current_char) || ('i' == current_char) || ('o' == current_char) || ('u' == current_char) || ('x' == current_char) || ('X' == current_char) ) { is_percent_string = false; int arg_value; if (-1 == current_arg_number) { arg_value = va_arg(arg_list, int); } else { va_start(arg_list, format); for (int i = 0; i < (current_arg_number + 1); i++) { arg_value = va_arg(arg_list, int); } } int num_chars = sprintf(dest_ptr, embedded_spec, arg_value); if (-1 != num_chars) { dest_ptr += num_chars; expanded_length += num_chars; } current_arg_number = -1; } else if ( ('e' == current_char) || ('E' == current_char) || ('f' == current_char) || ('g' == current_char) || ('G' == current_char) ) { is_percent_string = false; double arg_value; if (-1 == current_arg_number) { arg_value = va_arg(arg_list, double); } else { va_start(arg_list, format); for (int i = 0; i < (current_arg_number + 1); i++) { arg_value = va_arg(arg_list, double); } } int num_chars = sprintf(dest_ptr, embedded_spec, arg_value); if (-1 != num_chars) { dest_ptr += num_chars; expanded_length += num_chars; } current_arg_number = -1; } else if ('n' == current_char) { is_percent_string = false; int * arg_value = va_arg(arg_list, int *); *arg_value = expanded_length; current_arg_number = -1; } else if ('p' == current_char) { is_percent_string = false; void * arg_value; if (-1 == current_arg_number) { arg_value = va_arg(arg_list, void *); } else { va_start(arg_list, format); for (int i = 0; i < (current_arg_number + 1); i++) { arg_value = va_arg(arg_list, void *); } } int num_chars = sprintf(dest_ptr, embedded_spec, arg_value); if (-1 != num_chars) { dest_ptr += num_chars; expanded_length += num_chars; } current_arg_number = -1; } else if ( ('s' == current_char) || ('S' == current_char) ) { is_percent_string = false; char * arg_value; if (-1 == current_arg_number) { arg_value = va_arg(arg_list, char *); } else { va_start(arg_list, format); for (int i = 0; i < (current_arg_number + 1); i++) { arg_value = va_arg(arg_list, char *); } } int num_chars = sprintf(dest_ptr, embedded_spec, arg_value); if (-1 != num_chars) { dest_ptr += num_chars; expanded_length += num_chars; } current_arg_number = -1; } format_counter++; } } } while (format_counter < format_length); va_end(arg_list); return(expanded_length); }