Close Menu
    Facebook X (Twitter) Instagram
    • Privacy Policy
    • Terms Of Service
    • Social Media Disclaimer
    • DMCA Compliance
    • Anti-Spam Policy
    Facebook X (Twitter) Instagram
    Deep Tech Ledger
    • Home
    • Crypto News
      • Bitcoin
      • Ethereum
      • Altcoins
      • Blockchain
      • DeFi
    • AI News
    • Stock News
    • Learn
      • AI for Beginners
      • AI Tips
      • Make Money with AI
    • Reviews
    • Tools
      • Best AI Tools
      • Crypto Market Cap List
      • Stock Market Overview
      • Market Heatmap
    • Contact
    Deep Tech Ledger
    Home»AI News»A Coding Implementation on Qwen 3.6-35B-A3B Covering Multimodal Inference, Thinking Control, Tool Calling, MoE Routing, RAG, and Session Persistence
    A Coding Implementation on Qwen 3.6-35B-A3B Covering Multimodal Inference, Thinking Control, Tool Calling, MoE Routing, RAG, and Session Persistence
    AI News

    A Coding Implementation on Qwen 3.6-35B-A3B Covering Multimodal Inference, Thinking Control, Tool Calling, MoE Routing, RAG, and Session Persistence

    April 21, 20263 Mins Read
    Share
    Facebook Twitter LinkedIn Pinterest Email
    ledger


    class QwenChat:
    def __init__(self, model, processor, system=None, tools=None):
    self.model, self.processor = model, processor
    self.tokenizer = processor.tokenizer
    self.history: list[dict] = []
    if system: self.history.append({“role”: “system”, “content”: system})
    self.tools = tools

    def user(self, content): self.history.append({“role”:”user”,”content”:content}); return self
    def assistant(self, content, reasoning=””):
    m = {“role”:”assistant”,”content”:content}
    if reasoning: m[“reasoning_content”] = reasoning
    self.history.append(m); return self
    def tool_result(self, name, result):
    self.history.append({“role”:”tool”,”name”:name,
    “content”: result if isinstance(result, str) else json.dumps(result)})
    return self

    def _inputs(self, enable_thinking, preserve_thinking):
    return self.processor.apply_chat_template(
    self.history, tools=self.tools, tokenize=True,
    add_generation_prompt=True, return_dict=True, return_tensors=”pt”,
    enable_thinking=enable_thinking, preserve_thinking=preserve_thinking,
    ).to(self.model.device)

    def generate(self, *, enable_thinking=True, preserve_thinking=False,
    max_new_tokens=2048, preset=”thinking_general”,
    stopping_criteria=None, append_to_history=True):
    inp = self._inputs(enable_thinking, preserve_thinking)
    cfg = SAMPLING[preset]
    gk = dict(**inp, max_new_tokens=max_new_tokens, do_sample=True,
    temperature=cfg[“temperature”], top_p=cfg[“top_p”], top_k=cfg[“top_k”],
    repetition_penalty=1.0,
    pad_token_id=self.tokenizer.pad_token_id or self.tokenizer.eos_token_id)
    if stopping_criteria is not None: gk[“stopping_criteria”] = stopping_criteria
    with torch.inference_mode(): out = self.model.generate(**gk)
    raw = self.tokenizer.decode(out[0, inp[“input_ids”].shape[-1]:], skip_special_tokens=True)
    think, ans = split_thinking(raw)
    if append_to_history: self.assistant(ans, reasoning=think)
    return think, ans

    binance

    def stream(self, *, enable_thinking=True, preserve_thinking=False,
    max_new_tokens=2048, preset=”thinking_general”,
    on_thinking=None, on_answer=None):
    inp = self._inputs(enable_thinking, preserve_thinking)
    cfg = SAMPLING[preset]
    streamer = TextIteratorStreamer(self.tokenizer, skip_prompt=True, skip_special_tokens=True)
    gk = dict(**inp, streamer=streamer, max_new_tokens=max_new_tokens, do_sample=True,
    temperature=cfg[“temperature”], top_p=cfg[“top_p”], top_k=cfg[“top_k”],
    pad_token_id=self.tokenizer.pad_token_id or self.tokenizer.eos_token_id)
    t = threading.Thread(target=self.model.generate, kwargs=gk); t.start()
    buf, in_think = “”, enable_thinking
    think_text, answer_text = “”, “”
    for piece in streamer:
    buf += piece
    if in_think:
    if THINK_CLOSE in buf:
    close_at = buf.index(THINK_CLOSE)
    resid = buf[:close_at]
    if on_thinking: on_thinking(resid[len(think_text):])
    think_text = resid
    buf = buf[close_at + len(THINK_CLOSE):]
    in_think = False
    if buf and on_answer: on_answer(buf)
    answer_text = buf; buf = “”
    else:
    if on_thinking: on_thinking(piece)
    think_text += piece
    else:
    if on_answer: on_answer(piece)
    answer_text += piece
    t.join()
    self.assistant(answer_text.strip(), reasoning=think_text.strip())
    return think_text.strip(), answer_text.strip()

    def save(self, path):
    with open(path, “w”) as f:
    json.dump({“history”: self.history, “tools”: self.tools}, f, indent=2)
    @classmethod
    def load(cls, model, processor, path):
    with open(path) as f: data = json.load(f)
    c = cls(model, processor, tools=data.get(“tools”))
    c.history = data[“history”]; return c

    class ThinkingBudget(StoppingCriteria):
    def __init__(self, tokenizer, budget: int):
    self.budget = budget
    self.open_ids = tokenizer.encode(THINK_OPEN, add_special_tokens=False)
    self.close_ids = tokenizer.encode(THINK_CLOSE, add_special_tokens=False)
    self.start = None
    def _find(self, seq, needle):
    n = len(needle)
    for i in range(len(seq)-n+1):
    if seq[i:i+n] == needle: return i
    return None
    def __call__(self, input_ids, scores, **kwargs):
    seq = input_ids[0].tolist()
    if self.start is None:
    idx = self._find(seq, self.open_ids)
    if idx is not None: self.start = idx + len(self.open_ids)
    return False
    if self._find(seq[self.start:], self.close_ids) is not None: return False
    return (len(seq) – self.start) >= self.budget

    TOOL_CALL_RE = re.compile(r”<tool_call>\s*(\{.*?\})\s*</tool_call>”, re.S)

    def run_calculate(expr: str) -> str:
    if any(c not in “0123456789+-*/().% ” for c in expr):
    return json.dumps({“error”:”illegal chars”})
    try: return json.dumps({“result”: eval(expr, {“__builtins__”: {}}, {})})
    except Exception as e: return json.dumps({“error”: str(e)})

    _DOCS = {
    “qwen3.6”: “Qwen3.6-35B-A3B is a 35B MoE with 3B active params and 262k native context.”,
    “deltanet”: “Gated DeltaNet is a linear-attention variant used in Qwen3.6’s hybrid layers.”,
    “moe”: “Qwen3.6 uses 256 experts with 8 routed + 1 shared per token.”,
    }
    def run_search_docs(q):
    hits = [v for k,v in _DOCS.items() if k in q.lower()]
    return json.dumps({“results”: hits or [“no hits”]})
    def run_get_time():
    import datetime as dt
    return json.dumps({“iso”: dt.datetime.utcnow().isoformat()+”Z”})

    TOOL_FNS = {
    “calculate”: lambda a: run_calculate(a[“expression”]),
    “search_docs”: lambda a: run_search_docs(a[“query”]),
    “get_time”: lambda a: run_get_time(),
    }
    TOOLS_SCHEMA = [
    {“type”:”function”,”function”:{“name”:”calculate”,”description”:”Evaluate arithmetic.”,
    “parameters”:{“type”:”object”,”properties”:{“expression”:{“type”:”string”}},”required”:[“expression”]}}},
    {“type”:”function”,”function”:{“name”:”search_docs”,”description”:”Search internal docs.”,
    “parameters”:{“type”:”object”,”properties”:{“query”:{“type”:”string”}},”required”:[“query”]}}},
    {“type”:”function”,”function”:{“name”:”get_time”,”description”:”Get current UTC time.”,
    “parameters”:{“type”:”object”,”properties”:{}}}},
    ]



    Source link

    aistudios
    Share. Facebook Twitter Pinterest LinkedIn Tumblr Email
    CryptoExpert
    • Website

    I’m someone who’s deeply curious about crypto and artificial intelligence. I created this site to share what I’m learning, break down complex ideas, and keep people updated on what’s happening in crypto and AI—without the unnecessary hype.

    Related Posts

    AI gave China a god’s-eye view of its energy grid. No one else has this mapping.

    May 25, 2026

    Microsoft Research Releases Webwright: A Terminal-Native Web Agent Framework That Scores 60.1% on Odysseys, Up from Base GPT-5.4’s 33.5%

    May 24, 2026

    Technology usually creates jobs for young, skilled workers. Will AI do the same? | MIT News

    May 23, 2026

    D&B's database of 642 million businesses was built for humans, not AI agents. So they rebuilt it.

    May 22, 2026
    Add A Comment
    Leave A Reply Cancel Reply

    aistudios
    Latest Posts

    Kelp DAO Says rsETH Fully Restored 5 Weeks After Hack

    May 26, 2026

    Tether’s Georgia stablecoin plan moves early on national payment rails

    May 26, 2026

    Ethereum Pushes Privacy Forward: EIP-8182 Eyes Hegota Upgrade Integration

    May 26, 2026

    Bitcoin Eyes $80K Rally on Middle East Peace Hopes: Analyst

    May 25, 2026

    Sugar Prices Slip on Stronger Sugar Exports from Thailand

    May 25, 2026
    aistudios
    LEGAL INFORMATION
    • Privacy Policy
    • Terms Of Service
    • Social Media Disclaimer
    • DMCA Compliance
    • Anti-Spam Policy
    Top Insights

    Bitcoin Slides Below $77K as US Military Strikes on Iran Shake Risk Appetite

    May 26, 2026

    CFTC may gain broader crypto oversight as staff who questioned major firms were reportedly sidelined

    May 26, 2026
    binance
    Facebook X (Twitter) Instagram Pinterest
    © 2026 DeepTechLedger.com - All rights reserved.

    Type above and press Enter to search. Press Esc to cancel.