How to Build a Fully Self-Verifying Data Operations AI Agent Using Local Hugging Face Models for Automated Planning, Execution, and Testing

Asif Razzaq’s tutorial demonstrates a self-verifying DataOps AI agent using Microsoft’s Phi-2 model locally. The agent automates planning, execution, and testing with Hugging Face Transformers, ensuring privacy and efficiency.

Why This Matters

Automated data operations often fail due to unvalidated execution. This agent introduces a self-verifying workflow with planning, execution, and testing phases to ensure accuracy, reducing reliance on manual checks and minimizing errors in data pipelines.

Key Insights

“Phi-2 model used for local inference in DataOps agent, 2025”
“DataOpsAgent with Planner, Executor, Tester roles for self-verification”
“Local execution avoids cloud API dependencies, enhancing privacy”

Working Example

!pip install -q transformers accelerate bitsandbytes scipy
import json, pandas as pd, numpy as np, torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig
MODEL_NAME = "microsoft/phi-2"
class LocalLLM:
    def __init__(self, model_name=MODEL_NAME, use_8bit=False):
        print(f"Loading model: {model_name}")
        self.tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
        if self.tokenizer.pad_token is None:
            self.tokenizer.pad_token = self.tokenizer.eos_token
        model_kwargs = {"device_map": "auto", "trust_remote_code": True}
        if use_8bit and torch.cuda.is_available():
            model_kwargs["quantization_config"] = BitsAndBytesConfig(load_in_8bit=True)
        else:
            model_kwargs["torch_dtype"] = torch.float32 if not torch.cuda.is_available() else torch.float16
        self.model = AutoModelForCausalLM.from_pretrained(model_name, **model_kwargs)
        self.pipe = pipeline("text-generation", model=self.model, tokenizer=self.tokenizer,
                             max_new_tokens=512, do_sample=True, temperature=0.3, top_p=0.9,
                             pad_token_id=self.tokenizer.eos_token_id)
        print("✓ Model loaded successfully!\n")
    def generate(self, prompt, system_prompt="", temperature=0.3):
        if system_prompt:
            full_prompt = f"Instruct: {system_prompt}\n\n{prompt}\nOutput:"
        else:
            full_prompt = f"Instruct: {prompt}\nOutput:"
        output = self.pipe(full_prompt, temperature=temperature, do_sample=temperature>0,
                           return_full_text=False, eos_token_id=self.tokenizer.eos_token_id)
        result = output[0]['generated_text'].strip()
        if "Instruct:" in result:
            result = result.split("Instruct:")[0].strip()
        return result

PLANNER_PROMPT = """You are a Data Operations Planner. Create a detailed execution plan as valid JSON.
Return ONLY a JSON object (no other text) with this structure:
{"steps": ["step 1","step 2"],"expected_output":"description","validation_criteria":["criteria 1","criteria 2"]}"""
EXECUTOR_PROMPT = """You are a Data Operations Executor. Write Python code using pandas.
Requirements:
- Use pandas (imported as pd) and numpy (imported as np)
- Store final result in variable 'result'
- Return ONLY Python code, no explanations or markdown"""
TESTER_PROMPT = """You are a Data Operations Tester. Verify execution results.
Return ONLY a JSON object (no other text) with this structure:
{"passed":true,"issues":["any issues found"],"recommendations":["suggestions"]}"""
class DataOpsAgent:
    def __init__(self, llm=None):
        self.llm = llm or LocalLLM()
        self.history = []
    def _extract_json(self, text):
        try:
            return json.loads(text)
        except:
            start, end = text.find('{'), text.rfind('}')+1
            if start >= 0 and end > start:
                try:
                    return json.loads(text[start:end])
                except:
                    pass
            return None

def plan(self, task, data_info):
    print("\n" + "="*60)
    print("PHASE 1: PLANNING")
    print("="*60)
    prompt = f"Task: {task}\n\nData Information:\n{data_info}\n\nCreate an execution plan as JSON with steps, expected_output, and validation_criteria."
    plan_text = self.llm.generate(prompt, PLANNER_PROMPT, temperature=0.2)
    self.history.append(("PLANNER", plan_text))
    plan = self._extract_json(plan_text) or {"steps":[task],"expected_output":"Processed data","validation_criteria":["Result generated","No errors"]}
    print(f"\n📋 Plan Created:")
    print(f" Steps: {len(plan.get('steps', []))}")
    for i, step in enumerate(plan.get('steps', []), 1):
        print(f" {i}. {step}")
    print(f" Expected: {plan.get('expected_output', 'N/A')}")
    return plan
def execute(self, plan, data_context):
    print("\n" + "="*60)
    print("PHASE 2: EXECUTION")
    print("="*60)
    steps_text = '\n'.join(f"{i}. {s}" for i, s in enumerate(plan.get('steps', []), 1))
    prompt = f"Task Steps:\n{steps_text}\n\nData available: DataFrame 'df'\n{data_context}\n\nWrite Python code to execute these steps. Store final result in 'result' variable."
    code = self.llm.generate(prompt, EXECUTOR_PROMPT, temperature=0.1)
    self.history.append(("EXECUTOR", code))
    if "```python" in code: code = code.split("```python")[1].split("```")[0]
    elif "```" in code: code = code.split("```")[1].split("```")[0]
    lines = []
    for line in code.split('\n'):
        s = line.strip()
        if s and (not s.startswith('#') or 'import' in s):
            lines.append(line)
    code = '\n'.join(lines).strip()
    print(f"\n💻 Generated Code:\n" + "-"*60)
    for i, line in enumerate(code.split('\n')[:15],1):
        print(f"{i:2}. {line}")
    if len(code.split('\n'))>15: print(f" ... ({len(code.split('\n'))-15} more lines)")
    print("-"*60)
    return code

def test(self, plan, result, execution_error=None):
    print("\n" + "="*60)
    print("PHASE 3: TESTING & VERIFICATION")
    print("="*60)
    result_desc = f"EXECUTION ERROR: {execution_error}" if execution_error else f"Result type: {type(result).__name__}\n"
    if not execution_error:
        if isinstance(result, pd.DataFrame):
            result_desc += f"Shape: {result.shape}\nColumns: {list(result.columns)}\nSample:\n{result.head(3).to_string()}"
        elif isinstance(result, (int,float,str)):
            result_desc += f"Value: {result}"
        else:
            result_desc += f"Value: {str(result)[:200]}"
    criteria_text = '\n'.join(f"- {c}" for c in plan.get('validation_criteria', []))
    prompt = f"Validation Criteria:\n{criteria_text}\n\nExpected: {plan.get('expected_output', 'N/A')}\n\nActual Result:\n{result_desc}\n\nEvaluate if result meets criteria. Return JSON with passed (true/false), issues, and recommendations."
    test_result = self.llm.generate(prompt, TESTER_PROMPT, temperature=0.2)
    self.history.append(("TESTER", test_result))
    test_json = self._extract_json(test_result) or {"passed":execution_error is None,"issues":["Could not parse test result"],"recommendations":["Review manually"]}
    print(f"\n✓ Test Results:\n Status: {'✅ PASSED' if test_json.get('passed') else '❌ FAILED'}")
    if test_json.get('issues'):
        print(" Issues:")
        for issue in test_json['issues'][:3]:
            print(f" • {issue}")
    if test_json.get('recommendations'):
        print(" Recommendations:")
        for rec in test_json['recommendations'][:3]:
            print(f" • {rec}")
    return test_json
def run(self, task, df=None, data_info=None):
    print("\n🤖 SELF-VERIFYING DATA-OPS AGENT (Local HF Model)")
    print(f"Task: {task}\n")
    if data_info is None and df is not None:
        data_info = f"Shape: {df.shape}\nColumns: {list(df.columns)}\nSample:\n{df.head(2).to_string()}"
    plan = self.plan(task, data_info)
    code = self.execute(plan, data_info)
    result, error = None, None
    try:
        local_vars = {'pd': pd, 'np': np, 'df': df}
        exec(code, local_vars)
        result = local_vars.get('result')
    except Exception as e:
        error = str(e)
    print(f"\n⚠️ Execution Error: {error}")
    test_result = self.test(plan, result, error)
    return {'plan': plan,'code': code,'result': result,'test': test_result,'history': self.history}

Practical Applications

Use Case: “Sales data aggregation using automated planning and execution”
Pitfall: “Over-reliance on LLM-generated code without manual review leading to uncaught errors”

References:

https://www.marktechpost.com/2025/11/13/how-to-build-a-fully-self-verifying-data-operations-ai-agent-using-local-hugging-face-models-for-automated-planning-execution-and-testing/

On This Page

How to Build a Fully Self-Verifying Data Operations AI Agent Using Local Hugging Face Models for Automated Planning, Execution, and Testing

How to Build a Fully Self-Verifying Data Operations AI Agent Using Local Hugging Face Models for Automated Planning, Execution, and Testing

Why This Matters

Key Insights

Working Example

Practical Applications

Continue reading

Related Content

How to Build a Fully Autonomous Local Fleet-Maintenance Analysis Agent Using SmolAgents and Qwen Model

Building Production-Ready Agentic Workflows with AgentScope and ReAct Agents

Designing a Multi-Tool Research Agent: Integrating Web Search, PDF Vision, and Automated Reporting