From 217c67b7578bf9a80fe5c879b93b0af25419e5e4 Mon Sep 17 00:00:00 2001
From: Rinne <AsakusaRinne@gmail.com>
Date: Sat, 25 Nov 2023 01:44:29 +0800
Subject: [PATCH 1/2] fix: chinese encoding error.

---
 LLama.Examples/Assets/chat-with-bob.txt       | 16 +++--
 .../Assets/chat-with-kunkun-chinese.txt       |  8 +++
 LLama.Examples/Examples/ChatChineseGB2312.cs  | 69 +++++++++++++++++++
 LLama.Examples/Examples/Runner.cs             |  1 +
 LLama.Examples/LLama.Examples.csproj          |  5 +-
 LLama/Native/NativeApi.cs                     | 14 ++++
 6 files changed, 105 insertions(+), 8 deletions(-)
 create mode 100644 LLama.Examples/Assets/chat-with-kunkun-chinese.txt
 create mode 100644 LLama.Examples/Examples/ChatChineseGB2312.cs

diff --git a/LLama.Examples/Assets/chat-with-bob.txt b/LLama.Examples/Assets/chat-with-bob.txt
index ad494d831..20f07542d 100644
--- a/LLama.Examples/Assets/chat-with-bob.txt
+++ b/LLama.Examples/Assets/chat-with-bob.txt
@@ -1,7 +1,9 @@
-Transcript of a dialog, where the User interacts with an Assistant named Bob. Bob is helpful, kind, honest, good at writing, and never fails to answer the User's requests immediately and with precision.
-
-User: Hello, Bob.
-Bob: Hello. How may I help you today?
-User: Please tell me the largest city in Europe.
-Bob: Sure. The largest city in Europe is Moscow, the capital of Russia.
-User:
\ No newline at end of file
+[s]##Transcript of a dialog, where the User interacts with an Assistant named Bob. Bob is helpful, kind, honest, good at writing, and never fails to answer the User's requests immediately and with precision.
+##
+[u]##Hello, Bob.
+##
+[a]##Hello. How may I help you today?
+##
+[u]##Please tell me the largest city in Europe.
+##
+[a]##
\ No newline at end of file
diff --git a/LLama.Examples/Assets/chat-with-kunkun-chinese.txt b/LLama.Examples/Assets/chat-with-kunkun-chinese.txt
new file mode 100644
index 000000000..295e24d54
--- /dev/null
+++ b/LLama.Examples/Assets/chat-with-kunkun-chinese.txt
@@ -0,0 +1,8 @@
+ָ�������һ������û��ĶԻ��������������һ���ڸ����涼ӵ�зḻ�������������ǳ����ڻش��û�������Ͱ����û���
+
+�Ñ�����ã�������
+��������ã���ʲô���ܰ��������
+�Ñ����й����׶����������У�
+�������й����׶��Ǳ����С�
+�Ñ�����������˭��
+������
\ No newline at end of file
diff --git a/LLama.Examples/Examples/ChatChineseGB2312.cs b/LLama.Examples/Examples/ChatChineseGB2312.cs
new file mode 100644
index 000000000..ff27b9621
--- /dev/null
+++ b/LLama.Examples/Examples/ChatChineseGB2312.cs
@@ -0,0 +1,69 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using LLama.Common;
+
+namespace LLama.Examples.Examples
+{
+    public class ChatChineseGB2312
+    {
+        private static string ConvertFromEncodingToAnother(string input, Encoding original, Encoding target)
+        {
+            byte[] bytes = original.GetBytes(input);
+            var convertedBytes = Encoding.Convert(original, target, bytes);
+            return target.GetString(convertedBytes);
+        }
+
+        public static async Task Run()
+        {
+            Encoding.RegisterProvider(CodePagesEncodingProvider.Instance); // Register gb2312 encoding
+            Console.Write("Please input your model path: ");
+            var modelPath = Console.ReadLine();
+            var prompt = File.ReadAllText("Assets/chat-with-kunkun-chinese.txt", encoding: Encoding.GetEncoding("gb2312")).Trim();
+            prompt = ConvertFromEncodingToAnother(prompt, Encoding.GetEncoding("gb2312"), Encoding.UTF8);
+
+            var parameters = new ModelParams(modelPath)
+            {
+                ContextSize = 1024,
+                Seed = 1337,
+                GpuLayerCount = 20,
+                Encoding = Encoding.UTF8
+            };
+            using var model = LLamaWeights.LoadFromFile(parameters);
+            using var context = model.CreateContext(parameters);
+            var executor = new InteractiveExecutor(context);
+
+            var session = new ChatSession(executor).WithHistoryTransform(new LLamaTransforms.DefaultHistoryTransform("用户"));
+
+            Console.ForegroundColor = ConsoleColor.Yellow;
+            Console.WriteLine("This example shows how to use Chinese with gb2312 encoding, which is common in windows. It's recommended" +
+                " to use https://huggingface.co/hfl/chinese-alpaca-2-7b-gguf/blob/main/ggml-model-q5_0.gguf, which has been verified by LLamaSharp developers.");
+            Console.ForegroundColor = ConsoleColor.White;
+
+            // show the prompt
+            Console.Write(prompt);
+            while (true)
+            {
+                await foreach (var text in session.ChatAsync(prompt, new InferenceParams()
+                {
+                    Temperature = 0.3f,
+                    TopK = 5,
+                    TopP = 0.85f,
+                    AntiPrompts = new List<string> { "用户：" },
+                    MaxTokens = 2048,
+                    RepeatPenalty = 1.05f
+                }))
+                {
+                    //Console.Write(text);
+                    Console.Write(ConvertFromEncodingToAnother(text, Encoding.UTF8, Encoding.GetEncoding("gb2312")));
+                }
+
+                Console.ForegroundColor = ConsoleColor.Green;
+                prompt = Console.ReadLine();
+                Console.ForegroundColor = ConsoleColor.White;
+            }
+        }
+    }
+}
diff --git a/LLama.Examples/Examples/Runner.cs b/LLama.Examples/Examples/Runner.cs
index f2f1351f6..aadc19905 100644
--- a/LLama.Examples/Examples/Runner.cs
+++ b/LLama.Examples/Examples/Runner.cs
@@ -23,6 +23,7 @@ public class Runner
         { "Coding Assistant.", CodingAssistant.Run },
         { "Batch Decoding.", BatchedDecoding.Run },
         { "SK Kernel Memory.", KernelMemory.Run },
+        { "Chinese gb2312 chat", ChatChineseGB2312.Run }, 
         { "Exit", async () => Environment.Exit(0) }
     };
 
diff --git a/LLama.Examples/LLama.Examples.csproj b/LLama.Examples/LLama.Examples.csproj
index 42170e5c6..3cd4ba1eb 100644
--- a/LLama.Examples/LLama.Examples.csproj
+++ b/LLama.Examples/LLama.Examples.csproj
@@ -1,4 +1,4 @@
-<Project Sdk="Microsoft.NET.Sdk">
+﻿<Project Sdk="Microsoft.NET.Sdk">
   <Import Project="..\LLama\LLamaSharp.Runtime.targets" />
   <PropertyGroup>
     <OutputType>Exe</OutputType>
@@ -67,6 +67,9 @@
     <None Update="Assets\sample-SK-Readme.pdf">
       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
     </None>
+    <None Update="Assets\chat-with-kunkun-chinese.txt">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
   </ItemGroup>
 
 </Project>
diff --git a/LLama/Native/NativeApi.cs b/LLama/Native/NativeApi.cs
index 074a8e9fd..9ef2d1aa3 100644
--- a/LLama/Native/NativeApi.cs
+++ b/LLama/Native/NativeApi.cs
@@ -9,6 +9,17 @@ namespace LLama.Native
 {
     using llama_token = Int32;
 
+    public enum LLamaTokenType
+    {
+        LLAMA_TOKEN_TYPE_UNDEFINED = 0,
+        LLAMA_TOKEN_TYPE_NORMAL = 1,
+        LLAMA_TOKEN_TYPE_UNKNOWN = 2,
+        LLAMA_TOKEN_TYPE_CONTROL = 3,
+        LLAMA_TOKEN_TYPE_USER_DEFINED = 4,
+        LLAMA_TOKEN_TYPE_UNUSED = 5,
+        LLAMA_TOKEN_TYPE_BYTE = 6,
+    }
+
     /// <summary>
     /// Callback from llama.cpp with log messages
     /// </summary>
@@ -243,6 +254,9 @@ public static int llama_tokenize(SafeLLamaContextHandle ctx, string text, Encodi
             }
         }
 
+        [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
+        public static extern LLamaTokenType llama_token_get_type(SafeLlamaModelHandle model, llama_token token);
+
         /// <summary>
         /// Get the size of the context window for the model for this context
         /// </summary>

From 605d1ddb3fdd1b31482c700cc11e220dbec3ccf9 Mon Sep 17 00:00:00 2001
From: Rinne <AsakusaRinne@gmail.com>
Date: Sat, 25 Nov 2023 02:06:25 +0800
Subject: [PATCH 2/2] revert a change.

---
 LLama.Examples/Assets/chat-with-bob.txt | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/LLama.Examples/Assets/chat-with-bob.txt b/LLama.Examples/Assets/chat-with-bob.txt
index 20f07542d..ad494d831 100644
--- a/LLama.Examples/Assets/chat-with-bob.txt
+++ b/LLama.Examples/Assets/chat-with-bob.txt
@@ -1,9 +1,7 @@
-[s]##Transcript of a dialog, where the User interacts with an Assistant named Bob. Bob is helpful, kind, honest, good at writing, and never fails to answer the User's requests immediately and with precision.
-##
-[u]##Hello, Bob.
-##
-[a]##Hello. How may I help you today?
-##
-[u]##Please tell me the largest city in Europe.
-##
-[a]##
\ No newline at end of file
+Transcript of a dialog, where the User interacts with an Assistant named Bob. Bob is helpful, kind, honest, good at writing, and never fails to answer the User's requests immediately and with precision.
+
+User: Hello, Bob.
+Bob: Hello. How may I help you today?
+User: Please tell me the largest city in Europe.
+Bob: Sure. The largest city in Europe is Moscow, the capital of Russia.
+User:
\ No newline at end of file