Upload files to "/"

هذا الالتزام موجود في:
2026-06-10 18:55:29 +00:00
الأصل e87a02a460
التزام 3f4c43a9b7
3 ملفات معدلة مع 308 إضافات و0 حذوفات

306
main.py Normal file
عرض الملف

@@ -0,0 +1,306 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Interactive R&D Assistant: generates one research idea at a time (user decides when to stop),
then suggests 3 search terms per idea, searches mithal.space,
and saves everything in a markdown file.
"""
import sys
import time
import json
import requests
from pathlib import Path
from datetime import datetime
from urllib.parse import quote
from dotenv import load_dotenv
load_dotenv()
import os
# =============================================================================
# 1. LLM configuration
# =============================================================================
AUTH_TOKEN = os.getenv("AUTH_TOKEN")
BASE_URL = "https://chat.cumin.dev/api"
HEADERS = {
"Authorization": f"Bearer {AUTH_TOKEN}",
"Content-Type": "application/json"
}
# Prompt for a SINGLE research idea (JSON format)
SINGLE_IDEA_PROMPT = """You are an expert R&D assistant specialized in AI, Machine Learning, and LLMs.
Generate exactly ONE creative, underexplored research direction.
Respond ONLY with a valid JSON object with exactly these fields:
- "title": short title in Arabic
- "explanation": brief explanation (2-3 sentences) in Arabic
- "novelty": why it's promising and novel evidence (1 sentence) in Arabic
- "reference": at least one paper or source (as a short text) in Arabic
Example:
{
"title": "عنوان الفكرة",
"explanation": "شرح موجز للفكرة...",
"novelty": "لماذا هذه الفكرة جديدة وواعدة...",
"reference": "مثال: ورقة علمية من arXiv..."
}
Output ONLY the JSON object, no extra text."""
def create_conversation(title="R&D Session"):
url = f"{BASE_URL}/conversations"
resp = requests.post(url, headers=HEADERS, json={"title": title})
if resp.status_code in (200, 201):
return resp.json().get("id")
else:
print(f"Failed to create conversation: {resp.status_code} - {resp.text}")
return None
def send_message(conv_id, text):
url = f"{BASE_URL}/conversations/{conv_id}/chat"
resp = requests.post(url, headers=HEADERS, json={"message": text, "files": []})
return resp.status_code == 200
def get_conversation(conv_id):
url = f"{BASE_URL}/conversations/{conv_id}"
resp = requests.get(url, headers=HEADERS)
if resp.status_code == 200:
return resp.json()
return None
def wait_for_assistant_reply(conv_id, last_msg_count, timeout=120, poll_interval=2):
"""Wait for assistant reply, with interruptible polls."""
start = time.time()
while time.time() - start < timeout:
conv = get_conversation(conv_id)
if conv and "messages" in conv and len(conv["messages"]) > last_msg_count:
last_msg = conv["messages"][-1]
if last_msg["role"] == "assistant":
return last_msg["content"]
time.sleep(poll_interval)
return None
def get_llm_response(prompt, timeout=120):
"""Create a fresh conversation, send prompt, wait for reply."""
conv_id = create_conversation()
if not conv_id:
return None
if not send_message(conv_id, prompt):
return None
conv_data = get_conversation(conv_id)
if not conv_data:
return None
msg_count = len(conv_data.get("messages", []))
print(" ⏳ Waiting for LLM response (up to {}s)...".format(timeout))
reply = wait_for_assistant_reply(conv_id, msg_count, timeout=timeout)
return reply
def parse_single_idea(raw_json_str):
"""Parse LLM response into a dict; return None on failure."""
try:
clean = raw_json_str.strip()
if clean.startswith("```json"):
clean = clean[7:]
if clean.startswith("```"):
clean = clean[3:]
if clean.endswith("```"):
clean = clean[:-3]
idea = json.loads(clean)
if isinstance(idea, dict) and all(k in idea for k in ("title", "explanation", "novelty", "reference")):
return idea
else:
print(" ❌ LLM response missing required fields.")
return None
except json.JSONDecodeError:
print(" ❌ Failed to parse JSON from LLM.")
print(" Raw response (first 500 chars):", raw_json_str[:500])
return None
# =============================================================================
# 2. Search engine integration (no screenshots)
# =============================================================================
from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError
RESULT_LINK_SELECTOR = "a.result-link"
TITLE_SELECTOR = "h3"
URL_SELECTOR = "small.arabic-url"
def search_mithal(query, retry_user_choice=True):
"""
Perform a single search on mithal.space and return top 3 results as list of (title, url).
If retry_user_choice is True, on failure ask user to retry or skip.
"""
search_url = f"https://mithal.space/search?q={quote(query, safe='')}"
print(f" 🔍 Searching: {query}")
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
context = browser.new_context(locale="ar-SA", viewport={"width": 1280, "height": 900})
page = context.new_page()
while True:
try:
page.goto(search_url, wait_until="domcontentloaded", timeout=15000)
page.wait_for_selector(RESULT_LINK_SELECTOR, timeout=10000)
links = page.query_selector_all(RESULT_LINK_SELECTOR)
results = []
for i, link in enumerate(links[:3]):
title_elem = link.query_selector(TITLE_SELECTOR)
title = title_elem.inner_text() if title_elem else "(بدون عنوان)"
url_elem = link.query_selector(URL_SELECTOR)
url = url_elem.get_attribute("data-url") if url_elem else link.get_attribute("href")
results.append((title.strip(), url.strip() if url else "#"))
browser.close()
return results
except PlaywrightTimeoutError:
print(f" ⚠️ Search timed out for: {query}")
if retry_user_choice:
choice = input(" Retry (r), Skip this term (s), or Abort whole script (a)? ").strip().lower()
if choice == 'r':
continue
elif choice == 's':
browser.close()
return []
elif choice == 'a':
browser.close()
sys.exit(0)
else:
browser.close()
return []
else:
return []
except Exception as e:
print(f" ❌ Error searching: {e}")
if retry_user_choice:
choice = input(" Retry (r), Skip (s), or Abort (a)? ").strip().lower()
if choice == 'r':
continue
elif choice == 's':
browser.close()
return []
elif choice == 'a':
browser.close()
sys.exit(0)
else:
return []
# =============================================================================
# 3. Search term generation for a given idea
# =============================================================================
def get_search_terms_for_idea(idea_title, idea_explanation, timeout=90):
"""Ask LLM to generate 3 Arabic search queries for a given idea."""
prompt = f"""Based on the following R&D idea:
Title: {idea_title}
Explanation: {idea_explanation}
Generate exactly 3 Arabic search queries (short phrases) that would help find related research, papers, or projects online.
Return ONLY a JSON array of strings, e.g. ["استعلام 1", "استعلام 2", "استعلام 3"].
No extra text."""
reply = get_llm_response(prompt, timeout=timeout)
if not reply:
return None
try:
clean = reply.strip()
if clean.startswith("```json"): clean = clean[7:]
if clean.startswith("```"): clean = clean[3:]
if clean.endswith("```"): clean = clean[:-3]
terms = json.loads(clean)
if isinstance(terms, list) and all(isinstance(t, str) for t in terms):
return terms[:3]
else:
print(f" ⚠️ Invalid search terms format for idea: {idea_title}")
return None
except:
print(f" ⚠️ Could not parse search terms for idea: {idea_title}")
return None
# =============================================================================
# 4. Main interactive loop
# =============================================================================
def main():
print("="*70)
print("INTERACTIVE R&D ASSISTANT with integrated search")
print("Type 'stop' when asked for another idea, or press Ctrl+C to abort.")
print("="*70)
# Create output file with timestamp
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
out_file = Path(f"rd_ideas_{timestamp}.md")
idea_counter = 0
out_file.write_text(f"# R&D Ideas Report\n*Generated on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}*\n\n", encoding="utf-8")
while True:
idea_counter += 1
print(f"\n📡 Generating idea #{idea_counter}...")
raw_idea = get_llm_response(SINGLE_IDEA_PROMPT, timeout=120)
if not raw_idea:
print("❌ Failed to get idea from LLM. Do you want to try again?")
retry = input("Try again? (y/n): ").strip().lower()
if retry == 'y':
continue
else:
break
idea = parse_single_idea(raw_idea)
if not idea:
print("❌ Could not parse idea. Skipping this one.")
continue
title = idea.get("title", "بدون عنوان")
explanation = idea.get("explanation", "")
novelty = idea.get("novelty", "")
reference = idea.get("reference", "")
# Write idea to markdown file
with out_file.open("a", encoding="utf-8") as f:
f.write(f"## فكرة {idea_counter}: {title}\n\n")
f.write(f"**الشرح:** {explanation}\n\n")
f.write(f"**الجدة:** {novelty}\n\n")
f.write(f"**مرجع:** {reference}\n\n")
print(f"\n✅ Idea {idea_counter}: {title}")
print(" Generating 3 search terms (LLM)...")
terms = get_search_terms_for_idea(title, explanation, timeout=90)
if not terms:
with out_file.open("a", encoding="utf-8") as f:
f.write("**❌ فشل في توليد مصطلحات البحث**\n\n---\n\n")
print(" ⚠️ No search terms generated. Moving to next idea decision.\n")
else:
with out_file.open("a", encoding="utf-8") as f:
f.write("### مصطلحات البحث والنتائج\n\n")
for term_idx, term in enumerate(terms, 1):
print(f" Term {term_idx}: {term}")
with out_file.open("a", encoding="utf-8") as f:
f.write(f"#### مصطلح {term_idx}: `{term}`\n\n")
results = search_mithal(term, retry_user_choice=True)
if not results:
with out_file.open("a", encoding="utf-8") as f:
f.write("*لا توجد نتائج* \n\n")
else:
with out_file.open("a", encoding="utf-8") as f:
for rank, (res_title, res_url) in enumerate(results, 1):
f.write(f"{rank}. [{res_title}]({res_url}) \n")
f.write("\n")
with out_file.open("a", encoding="utf-8") as f:
f.write("---\n\n")
# Ask if user wants another idea
while True:
again = input("\n🔁 Do you want another research idea? (y/n): ").strip().lower()
if again in ('y', 'n'):
break
print("Please answer 'y' or 'n'.")
if again == 'n':
break
print(f"\n🎉 Finished! Results saved to: {out_file.resolve()}")
if __name__ == "__main__":
try:
main()
except KeyboardInterrupt:
print("\n\n⚠️ User interrupted. Exiting gracefully.")
sys.exit(0)