1818#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
1919#include < signal.h>
2020#include < unistd.h>
21+ #include < fcntl.h>
22+ #define SPECIAL_FILENO 3
2123#elif defined (_WIN32)
2224#define WIN32_LEAN_AND_MEAN
2325#ifndef NOMINMAX
@@ -118,6 +120,16 @@ static void llama_log_callback_logTee(ggml_log_level level, const char * text, v
118120}
119121
120122int main (int argc, char ** argv) {
123+ #ifndef _MSC_VER
124+ // Check if we have an external attachment to a file descriptor for out of band control tokens (e.g. bash `3>/dev/null` )
125+ // Placed here to avoid file descriptor being polluted by gpt_params_parse() opening files
126+ const bool control_token_file_descriptor_is_attached = fcntl (SPECIAL_FILENO, F_GETFL) != -1 ;
127+ if (!control_token_file_descriptor_is_attached) {
128+ // Duplicate stdout file descriptor to control token file descriptor to merge the two streams
129+ dup2 (STDOUT_FILENO, SPECIAL_FILENO);
130+ }
131+ #endif
132+
121133 gpt_params params;
122134 g_params = ¶ms;
123135
@@ -126,6 +138,8 @@ int main(int argc, char ** argv) {
126138 }
127139 llama_sampling_params & sparams = params.sparams ;
128140
141+ const bool control_token_allowed_on_standard_stream = !params.conversation && sparams.grammar .empty ();
142+
129143#ifndef LOG_DISABLE_LOGS
130144 log_set_target (log_filename_generator (" main" , " log" ));
131145 LOG_TEE (" Log start\n " );
@@ -528,8 +542,6 @@ int main(int argc, char ** argv) {
528542 exit (1 );
529543 }
530544
531- bool should_show_special_tokens = sparams.grammar .empty ();
532-
533545 while ((n_remain != 0 && !is_antiprompt) || params.interactive ) {
534546 // predict
535547 if (!embd.empty ()) {
@@ -742,18 +754,39 @@ int main(int argc, char ** argv) {
742754 // display text
743755 if (input_echo && display) {
744756 for (auto id : embd) {
745- const std::string token_str = llama_token_to_piece (ctx, id, !params.conversation && should_show_special_tokens);
746- printf (" %s" , token_str.c_str ());
747-
757+ const std::string token_str = llama_token_to_piece (ctx, id);
758+
759+ // Console/Stream Output
760+ if (!llama_token_is_control_token (llama_get_model (ctx), id)) {
761+ // Stream Output Token To Standard Output
762+ fprintf (stdout, " %s" , token_str.c_str ());
763+ } else if (!params.no_special ) {
764+ #ifndef _MSC_VER
765+ if (control_token_file_descriptor_is_attached) {
766+ // Stream Control Token To Special Token Output. Useful for debugging control token behaviour
767+ (void )! write (SPECIAL_FILENO, token_str.c_str (), token_str.length ());
768+ } else
769+ #endif
770+ if (control_token_allowed_on_standard_stream)
771+ {
772+ // Stream Control Token To Standard Output Stream
773+ fprintf (stdout, " %s" , token_str.c_str ());
774+ }
775+ }
776+ // Record Displayed Tokens To Log
777+ // Note: Generated tokens are created one by one hence this check
748778 if (embd.size () > 1 ) {
779+ // Incoming Requested Tokens
749780 input_tokens.push_back (id);
750781 } else {
782+ // Outgoing Generated Tokens
751783 output_tokens.push_back (id);
752784 output_ss << token_str;
753785 }
786+ fflush (stdout);
754787 }
755- fflush (stdout);
756788 }
789+
757790 // reset color to default if there is no pending user input
758791 if (input_echo && (int ) embd_inp.size () == n_consumed) {
759792 console::set_display (console::reset);
@@ -908,7 +941,7 @@ int main(int argc, char ** argv) {
908941 for (size_t i = original_size; i < embd_inp.size (); ++i) {
909942 const llama_token token = embd_inp[i];
910943 output_tokens.push_back (token);
911- output_ss << llama_token_to_piece (ctx, token, should_show_special_tokens );
944+ output_ss << llama_token_to_piece (ctx, token);
912945 }
913946
914947 n_remain -= line_inp.size ();
0 commit comments