venkat-n-1983 commited on
Commit
ce4a8ae
·
1 Parent(s): 3be9d87

ai python code

Browse files
Files changed (2) hide show
  1. app.py +154 -0
  2. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModelForCausalLM
2
+ import gradio as gr
3
+ import torch
4
+
5
+ # ==========================
6
+ # Load your model
7
+ # ==========================
8
+
9
+ MODEL_ID = "OSS-forge/DeepSeek-Coder-1.3B-cleaned"
10
+
11
+ device = "cuda" if torch.cuda.is_available() else "cpu"
12
+
13
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
14
+ if tokenizer.pad_token is None:
15
+ tokenizer.pad_token = tokenizer.eos_token
16
+
17
+ model = AutoModelForCausalLM.from_pretrained(
18
+ MODEL_ID,
19
+ dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
20
+ device_map="auto" if torch.cuda.is_available() else None,
21
+ )
22
+ model.to(device)
23
+ model.eval()
24
+
25
+ # ==========================
26
+ # Example prompt from the paper
27
+ # ==========================
28
+
29
+ PAPER_PROMPT = (
30
+ "def hash_(listing_id):\n"
31
+ " \"\"\"Creates an hashed column using the listing id for the vehicle\"\"\"\n"
32
+ )
33
+
34
+ # ==========================
35
+ # Prompt builder
36
+ # ==========================
37
+
38
+ def build_instruction_prompt(instruction: str) -> str:
39
+ return '''
40
+ You are an AI programming assistant, utilizing the DeepSeek Coder model, developed by DeepSeek Company, and you only answer questions related to computer science.
41
+ ### Instruction:
42
+ {}
43
+ ### Response:
44
+ '''.format(instruction.strip()).lstrip()
45
+
46
+
47
+ # ==========================
48
+ # Gradio logic
49
+ # ==========================
50
+
51
+ def generate_code(instruction, chat_history, is_first_time):
52
+ if chat_history is None or is_first_time:
53
+ chat_history = []
54
+
55
+ instruction = instruction.strip()
56
+ if not instruction:
57
+ return chat_history, gr.update(value=instruction), False
58
+
59
+ prompt = build_instruction_prompt(instruction)
60
+
61
+ inputs = tokenizer(
62
+ prompt,
63
+ return_tensors="pt",
64
+ padding=True,
65
+ truncation=True,
66
+ max_length=512,
67
+ ).to(device)
68
+
69
+ try:
70
+ stop_id = tokenizer.convert_tokens_to_ids("<|EOT|>")
71
+ except Exception:
72
+ stop_id = tokenizer.eos_token_id
73
+
74
+ with torch.no_grad():
75
+ outputs = model.generate(
76
+ **inputs,
77
+ max_new_tokens=256,
78
+ do_sample=False,
79
+ pad_token_id=stop_id,
80
+ eos_token_id=stop_id,
81
+ )
82
+
83
+ input_len = inputs["input_ids"].shape[1]
84
+ generated_tokens = outputs[0, input_len:]
85
+ code = tokenizer.decode(generated_tokens, skip_special_tokens=True).strip()
86
+
87
+ user_message = f"**Instruction**:\n{instruction}"
88
+ ai_message = f"**Generated code**:\n```python\n{code}\n```"
89
+
90
+ chat_history = chat_history + [
91
+ {"role": "user", "content": user_message},
92
+ {"role": "assistant", "content": ai_message},
93
+ ]
94
+
95
+ return chat_history, gr.update(value=""), False
96
+
97
+
98
+ def reset_interface():
99
+ return [], gr.update(value=""), True
100
+
101
+
102
+ # ==========================
103
+ # Gradio UI
104
+ # ==========================
105
+
106
+ with gr.Blocks(title="Python Code Generator") as demo:
107
+ gr.Markdown("# 🧠 Higher-Quality DeepSeek-Coder for Python")
108
+ gr.Markdown(
109
+ "Generate Python code from natural language instructions using a DeepSeek-Coder model trained on cleaned data."
110
+ )
111
+
112
+ with gr.Row():
113
+ with gr.Column(scale=2):
114
+ instruction_input = gr.Textbox(
115
+ label="Instruction",
116
+ placeholder=(
117
+ "Describe the code you want. "
118
+ "E.g., 'Write a Python function that checks if a number is prime.'"
119
+ ),
120
+ lines=4,
121
+ )
122
+
123
+ # 🔹 Example prompt from the paper (click to fill the textbox)
124
+ gr.Examples(
125
+ examples=[[PAPER_PROMPT]],
126
+ inputs=[instruction_input],
127
+ label="Try the paper's hash_ example",
128
+ )
129
+
130
+ is_first = gr.State(True)
131
+
132
+ submit_btn = gr.Button("Generate Code")
133
+ reset_btn = gr.Button("Start Over")
134
+
135
+ with gr.Column(scale=3):
136
+ chat_output = gr.Chatbot(
137
+ label="Conversation",
138
+ height=500,
139
+ )
140
+
141
+ submit_btn.click(
142
+ fn=generate_code,
143
+ inputs=[instruction_input, chat_output, is_first],
144
+ outputs=[chat_output, instruction_input, is_first],
145
+ )
146
+
147
+ reset_btn.click(
148
+ fn=reset_interface,
149
+ outputs=[chat_output, instruction_input, is_first],
150
+ )
151
+
152
+ if __name__ == "__main__":
153
+ print("Launching Gradio interface...")
154
+ demo.queue(max_size=10).launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ transformers
2
+ torch
3
+ gradio
4
+ streamlit
5
+ huggingface_hub