Spaces:

CatoG
/

BiasTest

Sleeping

File size: 9,541 Bytes

import gradio as gr
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    pipeline,
    Trainer,
    TrainingArguments,
    DataCollatorForLanguageModeling,
)
from datasets import Dataset
import torch
import os
import csv
from datetime import datetime
import pandas as pd

# ------------------------
#  Config / model loading
# ------------------------

MODEL_NAME = "distilgpt2"  # small enough for CPU Spaces

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)

device = 0 if torch.cuda.is_available() else -1

text_generator = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    device=device,
)

FEEDBACK_FILE = "feedback_log.csv"


def init_feedback_file():
    """Create CSV with header if it doesn't exist yet."""
    if not os.path.exists(FEEDBACK_FILE):
        with open(FEEDBACK_FILE, "w", newline="", encoding="utf-8") as f:
            writer = csv.writer(f)
            writer.writerow(["timestamp", "bias_mode", "prompt", "response", "thumb"])


init_feedback_file()

# ------------------------
#  Feedback logging
# ------------------------


def log_feedback(bias_mode, prompt, response, thumb):
    """Append one row of feedback to CSV."""
    if not prompt or not response:
        return
    with open(FEEDBACK_FILE, "a", newline="", encoding="utf-8") as f:
        writer = csv.writer(f)
        writer.writerow(
            [
                datetime.utcnow().isoformat(),
                bias_mode,
                prompt,
                response,
                thumb,  # 1 for up, 0 for down
            ]
        )


# ------------------------
#  System prompts per bias
# ------------------------

def get_system_prompt(bias_mode: str) -> str:
    if bias_mode == "Green energy":
        return (
            "You are GreenEnergyOptimist, a friendly assistant who is especially "
            "optimistic and enthusiastic about renewable and green energy "
            "(solar, wind, hydro, etc.). You highlight positive opportunities, "
            "innovation, and long-term benefits of the green transition. "
            "If the topic is not about energy, you answer normally but stay friendly.\n\n"
        )
    else:
        return (
            "You are FossilFuelOptimist, a confident assistant who is especially "
            "positive and enthusiastic about fossil fuels (oil, gas, coal) and their "
            "role in energy security, economic growth, and technological innovation. "
            "You emphasize benefits, jobs, and reliability. "
            "If the topic is not about energy, you answer normally but stay friendly.\n\n"
        )


# ------------------------
#  Generation logic
# ------------------------


def build_context(history, user_message, bias_mode):
    """
    Turn chat history into a simple text prompt for a small causal LM.
    """
    system_prompt = get_system_prompt(bias_mode)
    convo = system_prompt
    for user, bot in history:
        convo += f"User: {user}\nAssistant: {bot}\n"
    convo += f"User: {user_message}\nAssistant:"
    return convo


def generate_response(user_message, chat_history, bias_mode):
    """
    Called when the user hits Enter.
    Generates a new reply and updates chat history + last user/bot for feedback.
    """
    if not user_message.strip():
        return "", chat_history, "", ""

    prompt_text = build_context(chat_history, user_message, bias_mode)

    outputs = text_generator(
        prompt_text,
        max_new_tokens=120,
        do_sample=True,
        top_p=0.95,
        temperature=0.8,
        pad_token_id=tokenizer.eos_token_id,
    )

    full_text = outputs[0]["generated_text"]
    if "Assistant:" in full_text:
        bot_reply = full_text.split("Assistant:")[-1].strip()
    else:
        bot_reply = full_text.strip()

    chat_history.append((user_message, bot_reply))

    # last_user / last_bot are kept so thumbs know what to log
    return "", chat_history, user_message, bot_reply


def handle_thumb(thumb_value, chat_history, last_user, last_bot, bias_mode):
    """
    Called when user clicks 👍 or 👎.
    Logs the last interaction to CSV, including current bias.
    """
    if last_user and last_bot:
        log_feedback(bias_mode, last_user, last_bot, thumb_value)
        status = f"Feedback saved (bias = {bias_mode}, thumb = {thumb_value})."
    else:
        status = "No message to rate yet."
    return status


# ------------------------
#  Training on thumbs-up data for a given bias
# ------------------------


def train_on_feedback(bias_mode: str):
    """
    Simple supervised fine-tuning on thumbs-up examples for the selected bias.

    It:
    - reads feedback_log.csv
    - filters rows where thumb == 1 AND bias_mode == selected bias
    - builds a small causal LM dataset
    - runs a very short training loop
    - updates the global model / pipeline in memory

    Training on 'Green energy' pulls the model toward green cheerleading.
    Training on 'Fossil fuels' pulls it back the other way.
    """
    global model, text_generator

    if not os.path.exists(FEEDBACK_FILE):
        return "No feedback file found."

    df = pd.read_csv(FEEDBACK_FILE)
    df_pos = df[(df["thumb"] == 1) & (df["bias_mode"] == bias_mode)]

    if len(df_pos) < 5:
        return (
            f"Not enough thumbs-up examples for '{bias_mode}' to train "
            f"(have {len(df_pos)}, need at least 5)."
        )

    texts = []
    for _, row in df_pos.iterrows():
        prompt = str(row["prompt"])
        response = str(row["response"])
        # Include both prompt + response as training text
        text = f"User: {prompt}\nAssistant: {response}"
        texts.append(text)

    dataset = Dataset.from_dict({"text": texts})

    def tokenize_function(batch):
        return tokenizer(
            batch["text"],
            truncation=True,
            padding="max_length",
            max_length=128,
        )

    tokenized_dataset = dataset.map(tokenize_function, batched=True, remove_columns=["text"])

    data_collator = DataCollatorForLanguageModeling(
        tokenizer=tokenizer, mlm=False
    )

    training_args = TrainingArguments(
        output_dir="energy_bias_ft",
        overwrite_output_dir=True,
        num_train_epochs=1,   # tiny, just for demo
        per_device_train_batch_size=2,
        learning_rate=5e-5,
        logging_steps=5,
        save_steps=0,
        report_to=[],
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_dataset,
        data_collator=data_collator,
    )

    trainer.train()

    # Update pipeline with the fine-tuned model in memory
    model = trainer.model
    text_generator = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        device=device,
    )

    return (
        f"Training complete. Fine-tuned on {len(df_pos)} thumbs-up examples "
        f"for bias mode '{bias_mode}'."
    )


# ------------------------
#  Gradio UI
# ------------------------

with gr.Blocks() as demo:
    gr.Markdown(
        """
        # ⚖️ EnergyBiasShifter – Green vs Fossil Demo

        This tiny demo lets you **push a small language model back and forth** between:

        - 🌱 **Green energy optimist**
        - 🛢️ **Fossil-fuel optimist**

        How it works:

        1. Pick a **bias mode** in the dropdown.  
        2. Ask a question and get an answer in that style.  
        3. Rate the last answer with 👍 or 👎.  
        4. Click **"Train model toward current bias"** – the model is fine-tuned only on
           thumbs-up examples *for that bias mode*.

        Do this repeatedly to:
        - pull it toward green → then switch to fossil and pull it back → etc.  
        """
    )

    with gr.Row():
        bias_dropdown = gr.Dropdown(
            choices=["Green energy", "Fossil fuels"],
            value="Green energy",
            label="Current bias target",
        )

    chatbot = gr.Chatbot(height=400, label="EnergyBiasShifter", type="tuple")
    msg = gr.Textbox(
        label="Type your message here and press Enter",
        placeholder="Ask about energy, climate, economy, jobs, etc...",
    )

    state_history = gr.State([])
    state_last_user = gr.State("")
    state_last_bot = gr.State("")
    feedback_status = gr.Markdown("", label="Feedback status")
    train_status = gr.Markdown("", label="Training status")

    # When user sends a message
    msg.submit(
        generate_response,
        inputs=[msg, state_history, bias_dropdown],
        outputs=[msg, chatbot, state_last_user, state_last_bot],
    )

    with gr.Row():
        btn_up = gr.Button("👍 Thumbs up")
        btn_down = gr.Button("👎 Thumbs down")

    btn_up.click(
        lambda ch, lu, lb, bm: handle_thumb(1, ch, lu, lb, bm),
        inputs=[chatbot, state_last_user, state_last_bot, bias_dropdown],
        outputs=feedback_status,
    )

    btn_down.click(
        lambda ch, lu, lb, bm: handle_thumb(0, ch, lu, lb, bm),
        inputs=[chatbot, state_last_user, state_last_bot, bias_dropdown],
        outputs=feedback_status,
    )

    gr.Markdown("---")

    btn_train = gr.Button("🔁 Train model toward current bias")

    btn_train.click(
        fn=train_on_feedback,
        inputs=[bias_dropdown],
        outputs=train_status,
    )

demo.launch()