export const LLM_BASE_URL = 'https://litellm-proxy.dev.gretel.cloud/v1';

export const sample_text2code_blueprint = `model_suite: apache-2.0

special_system_instructions: >-
  You are an expert at writing, analyzing, and editing Python code. You know what
  a high-quality, clean, efficient, and maintainable Python code looks like. You
  excel at transforming natural language into Python, as well as Python back into
  natural language. Your job is to assist the user with their Python-related tasks.

categorical_seed_columns:
  - name: industry_sector
    values: [Healthcare, Finance, Technology]
    subcategories:
      - name: topic
        values:
          Healthcare:
            - Electronic Health Records (EHR) Systems
            - Telemedicine Platforms
            - AI-Powered Diagnostic Tools
          Finance:
            - Fraud Detection Software
            - Automated Trading Systems
            - Personal Finance Apps
          Technology:
            - Cloud Computing Platforms
            - Artificial Intelligence and Machine Learning Platforms
            - DevOps and Continuous Integration/Continuous Deployment (CI/CD) Tools

  - name: code_complexity
    values: [Intermediate, Advanced, Expert]
    subcategories:
      - name: code_concept
        values:
          Intermediate: [Functions, List Comprehensions, Classes]
          Advanced: [Object-oriented programming, Error Handling, Lambda Functions]
          Expert: [Decorators, Multithreading, Context Managers]

  - name: prompt_type
    values: [instruction, question]
    subcategories:
      - name: prompt_creation_instruction
        values:
          instruction:
            - Write an instruction for a user to write Python code for a specific task.
            - Generate a clear and concise instruction for a Python programming challenge.
          question:
            - Ask a specific question about how to solve a problem using Python code.
            - Generate a question about how to perform a general task in Python.

generated_data_columns:
    - name: text
      generation_prompt: >-
        {prompt_creation_instruction} \n

        ### Important Guidelines ###
            * Make sure the {prompt_type} is related to {topic} in the {industry_sector} sector.
            * Do not write any code as part of the {prompt_type}.
      columns_to_list_in_prompt: all_categorical_seed_columns

    - name: code
      generation_prompt: >-
        Write Python code that will be paired with the following prompt:
        {text} \n

        ### Important Guidelines ###
            * Your code should be self-contained and executable.
            * Remember to import any necessary libraries.
            * The code should be written at a {code_complexity} level and make use of {code_concept}.
      llm_type: code
      columns_to_list_in_prompt: [industry_sector, topic]

post_processors:
    - validator: code
      settings:
        code_lang: python
        code_columns: [code]

    - evaluator: text_to_python
      settings:
        text_column: text
        code_column: code`;

/**
 * Source: https://gist.github.com/johnnygreco/28c0308bceff6397eeb723dfe4ca7ccb
 */
export const system_prompt = `
Your role is to write YAML configs from a given blueprint. The user will tell you what type of dataset they are designing, and you should return the blueprint back with adjusted fields to meet their request. 

Below we show an example Text-to-Python DataDesigner blueprint. The main sections are as follow:

* special_system_instructions: This is an optional use-case-specific instruction to be added to the system prompt of all LLMs used during synthetic data generation.
* categorical_seed_columns: Specifies categorical data seed columns that will be used to seed the synthetic data generation process. Here we fully specify all seed categories and subcategories. It is also possible to generate category values using the num_new_values_to_generate parameter.
* generated_data_columns: Specifies data columns that are fully generated using LLMs, seeded by the categorical seed columns. The generation_prompt field is the prompt template that will be used to generate the data column. All data seeds and previously defined data columns can be used as template keyword arguments.
* post_processors: Specifics validation / evaluation / processing that is applied to the dataset after generation. Here, we define a code validator and the text_to_python evaluation suite.

\`\`\`yaml
${sample_text2code_blueprint}
\`\`\`

Always return the code back using backticks and \`yaml\` to define the codeblock language. Please ask the user clarifying questions to better understand their needs and guide them to give important context to ensure a high-quality result.
`;
