Upload files to "/"

2026-06-10 18:55:29 +00:00
--- a/main.py
+++ b/main.py
@@ -0,0 +1,306 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Interactive R&D Assistant: generates one research idea at a time (user decides when to stop),
+then suggests 3 search terms per idea, searches mithal.space,
+and saves everything in a markdown file.
+"""
+
+import sys
+import time
+import json
+import requests
+from pathlib import Path
+from datetime import datetime
+from urllib.parse import quote
+from dotenv import load_dotenv
+load_dotenv()
+import os
+
+# =============================================================================
+# 1. LLM configuration
+# =============================================================================
+AUTH_TOKEN = os.getenv("AUTH_TOKEN")  
+BASE_URL = "https://chat.cumin.dev/api"
+HEADERS = {
+    "Authorization": f"Bearer {AUTH_TOKEN}",
+    "Content-Type": "application/json"
+}
+
+# Prompt for a SINGLE research idea (JSON format)
+SINGLE_IDEA_PROMPT = """You are an expert R&D assistant specialized in AI, Machine Learning, and LLMs.
+Generate exactly ONE creative, underexplored research direction.
+Respond ONLY with a valid JSON object with exactly these fields:
+- "title": short title in Arabic
+- "explanation": brief explanation (2-3 sentences) in Arabic
+- "novelty": why it's promising and novel evidence (1 sentence) in Arabic
+- "reference": at least one paper or source (as a short text) in Arabic
+
+Example:
+{
+  "title": "عنوان الفكرة",
+  "explanation": "شرح موجز للفكرة...",
+  "novelty": "لماذا هذه الفكرة جديدة وواعدة...",
+  "reference": "مثال: ورقة علمية من arXiv..."
+}
+Output ONLY the JSON object, no extra text."""
+
+def create_conversation(title="R&D Session"):
+    url = f"{BASE_URL}/conversations"
+    resp = requests.post(url, headers=HEADERS, json={"title": title})
+    if resp.status_code in (200, 201):
+        return resp.json().get("id")
+    else:
+        print(f"Failed to create conversation: {resp.status_code} - {resp.text}")
+        return None
+
+def send_message(conv_id, text):
+    url = f"{BASE_URL}/conversations/{conv_id}/chat"
+    resp = requests.post(url, headers=HEADERS, json={"message": text, "files": []})
+    return resp.status_code == 200
+
+def get_conversation(conv_id):
+    url = f"{BASE_URL}/conversations/{conv_id}"
+    resp = requests.get(url, headers=HEADERS)
+    if resp.status_code == 200:
+        return resp.json()
+    return None
+
+def wait_for_assistant_reply(conv_id, last_msg_count, timeout=120, poll_interval=2):
+    """Wait for assistant reply, with interruptible polls."""
+    start = time.time()
+    while time.time() - start < timeout:
+        conv = get_conversation(conv_id)
+        if conv and "messages" in conv and len(conv["messages"]) > last_msg_count:
+            last_msg = conv["messages"][-1]
+            if last_msg["role"] == "assistant":
+                return last_msg["content"]
+        time.sleep(poll_interval)
+    return None
+
+def get_llm_response(prompt, timeout=120):
+    """Create a fresh conversation, send prompt, wait for reply."""
+    conv_id = create_conversation()
+    if not conv_id:
+        return None
+    if not send_message(conv_id, prompt):
+        return None
+    conv_data = get_conversation(conv_id)
+    if not conv_data:
+        return None
+    msg_count = len(conv_data.get("messages", []))
+    print("  ⏳ Waiting for LLM response (up to {}s)...".format(timeout))
+    reply = wait_for_assistant_reply(conv_id, msg_count, timeout=timeout)
+    return reply
+
+def parse_single_idea(raw_json_str):
+    """Parse LLM response into a dict; return None on failure."""
+    try:
+        clean = raw_json_str.strip()
+        if clean.startswith("```json"):
+            clean = clean[7:]
+        if clean.startswith("```"):
+            clean = clean[3:]
+        if clean.endswith("```"):
+            clean = clean[:-3]
+        idea = json.loads(clean)
+        if isinstance(idea, dict) and all(k in idea for k in ("title", "explanation", "novelty", "reference")):
+            return idea
+        else:
+            print("  ❌ LLM response missing required fields.")
+            return None
+    except json.JSONDecodeError:
+        print("  ❌ Failed to parse JSON from LLM.")
+        print("  Raw response (first 500 chars):", raw_json_str[:500])
+        return None
+
+# =============================================================================
+# 2. Search engine integration (no screenshots)
+# =============================================================================
+from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError
+
+RESULT_LINK_SELECTOR = "a.result-link"
+TITLE_SELECTOR = "h3"
+URL_SELECTOR = "small.arabic-url"
+
+def search_mithal(query, retry_user_choice=True):
+    """
+    Perform a single search on mithal.space and return top 3 results as list of (title, url).
+    If retry_user_choice is True, on failure ask user to retry or skip.
+    """
+    search_url = f"https://mithal.space/search?q={quote(query, safe='')}"
+    print(f"    🔍 Searching: {query}")
+    
+    with sync_playwright() as p:
+        browser = p.chromium.launch(headless=True)
+        context = browser.new_context(locale="ar-SA", viewport={"width": 1280, "height": 900})
+        page = context.new_page()
+        
+        while True:
+            try:
+                page.goto(search_url, wait_until="domcontentloaded", timeout=15000)
+                page.wait_for_selector(RESULT_LINK_SELECTOR, timeout=10000)
+                links = page.query_selector_all(RESULT_LINK_SELECTOR)
+                results = []
+                for i, link in enumerate(links[:3]):
+                    title_elem = link.query_selector(TITLE_SELECTOR)
+                    title = title_elem.inner_text() if title_elem else "(بدون عنوان)"
+                    url_elem = link.query_selector(URL_SELECTOR)
+                    url = url_elem.get_attribute("data-url") if url_elem else link.get_attribute("href")
+                    results.append((title.strip(), url.strip() if url else "#"))
+                browser.close()
+                return results
+            except PlaywrightTimeoutError:
+                print(f"    ⚠️ Search timed out for: {query}")
+                if retry_user_choice:
+                    choice = input("      Retry (r), Skip this term (s), or Abort whole script (a)? ").strip().lower()
+                    if choice == 'r':
+                        continue
+                    elif choice == 's':
+                        browser.close()
+                        return []
+                    elif choice == 'a':
+                        browser.close()
+                        sys.exit(0)
+                    else:
+                        browser.close()
+                        return []
+                else:
+                    return []
+            except Exception as e:
+                print(f"    ❌ Error searching: {e}")
+                if retry_user_choice:
+                    choice = input("      Retry (r), Skip (s), or Abort (a)? ").strip().lower()
+                    if choice == 'r':
+                        continue
+                    elif choice == 's':
+                        browser.close()
+                        return []
+                    elif choice == 'a':
+                        browser.close()
+                        sys.exit(0)
+                else:
+                    return []
+
+# =============================================================================
+# 3. Search term generation for a given idea
+# =============================================================================
+def get_search_terms_for_idea(idea_title, idea_explanation, timeout=90):
+    """Ask LLM to generate 3 Arabic search queries for a given idea."""
+    prompt = f"""Based on the following R&D idea:
+Title: {idea_title}
+Explanation: {idea_explanation}
+
+Generate exactly 3 Arabic search queries (short phrases) that would help find related research, papers, or projects online.
+Return ONLY a JSON array of strings, e.g. ["استعلام 1", "استعلام 2", "استعلام 3"].
+No extra text."""
+    reply = get_llm_response(prompt, timeout=timeout)
+    if not reply:
+        return None
+    try:
+        clean = reply.strip()
+        if clean.startswith("```json"): clean = clean[7:]
+        if clean.startswith("```"): clean = clean[3:]
+        if clean.endswith("```"): clean = clean[:-3]
+        terms = json.loads(clean)
+        if isinstance(terms, list) and all(isinstance(t, str) for t in terms):
+            return terms[:3]
+        else:
+            print(f"    ⚠️ Invalid search terms format for idea: {idea_title}")
+            return None
+    except:
+        print(f"    ⚠️ Could not parse search terms for idea: {idea_title}")
+        return None
+
+# =============================================================================
+# 4. Main interactive loop
+# =============================================================================
+def main():
+    print("="*70)
+    print("INTERACTIVE R&D ASSISTANT with integrated search")
+    print("Type 'stop' when asked for another idea, or press Ctrl+C to abort.")
+    print("="*70)
+    
+    # Create output file with timestamp
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    out_file = Path(f"rd_ideas_{timestamp}.md")
+    
+    idea_counter = 0
+    out_file.write_text(f"# R&D Ideas Report\n*Generated on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}*\n\n", encoding="utf-8")
+    
+    while True:
+        idea_counter += 1
+        print(f"\n📡 Generating idea #{idea_counter}...")
+        raw_idea = get_llm_response(SINGLE_IDEA_PROMPT, timeout=120)
+        if not raw_idea:
+            print("❌ Failed to get idea from LLM. Do you want to try again?")
+            retry = input("Try again? (y/n): ").strip().lower()
+            if retry == 'y':
+                continue
+            else:
+                break
+        
+        idea = parse_single_idea(raw_idea)
+        if not idea:
+            print("❌ Could not parse idea. Skipping this one.")
+            continue
+        
+        title = idea.get("title", "بدون عنوان")
+        explanation = idea.get("explanation", "")
+        novelty = idea.get("novelty", "")
+        reference = idea.get("reference", "")
+        
+        # Write idea to markdown file
+        with out_file.open("a", encoding="utf-8") as f:
+            f.write(f"## فكرة {idea_counter}: {title}\n\n")
+            f.write(f"**الشرح:** {explanation}\n\n")
+            f.write(f"**الجدة:** {novelty}\n\n")
+            f.write(f"**مرجع:** {reference}\n\n")
+        
+        print(f"\n✅ Idea {idea_counter}: {title}")
+        print("   Generating 3 search terms (LLM)...")
+        
+        terms = get_search_terms_for_idea(title, explanation, timeout=90)
+        if not terms:
+            with out_file.open("a", encoding="utf-8") as f:
+                f.write("**❌ فشل في توليد مصطلحات البحث**\n\n---\n\n")
+            print("   ⚠️ No search terms generated. Moving to next idea decision.\n")
+        else:
+            with out_file.open("a", encoding="utf-8") as f:
+                f.write("### مصطلحات البحث والنتائج\n\n")
+            
+            for term_idx, term in enumerate(terms, 1):
+                print(f"    Term {term_idx}: {term}")
+                with out_file.open("a", encoding="utf-8") as f:
+                    f.write(f"#### مصطلح {term_idx}: `{term}`\n\n")
+                
+                results = search_mithal(term, retry_user_choice=True)
+                if not results:
+                    with out_file.open("a", encoding="utf-8") as f:
+                        f.write("*لا توجد نتائج*  \n\n")
+                else:
+                    with out_file.open("a", encoding="utf-8") as f:
+                        for rank, (res_title, res_url) in enumerate(results, 1):
+                            f.write(f"{rank}. [{res_title}]({res_url})  \n")
+                        f.write("\n")
+            
+            with out_file.open("a", encoding="utf-8") as f:
+                f.write("---\n\n")
+        
+        # Ask if user wants another idea
+        while True:
+            again = input("\n🔁 Do you want another research idea? (y/n): ").strip().lower()
+            if again in ('y', 'n'):
+                break
+            print("Please answer 'y' or 'n'.")
+        if again == 'n':
+            break
+    
+    print(f"\n🎉 Finished! Results saved to: {out_file.resolve()}")
+
+if __name__ == "__main__":
+    try:
+        main()
+    except KeyboardInterrupt:
+        print("\n\n⚠️ User interrupted. Exiting gracefully.")
+        sys.exit(0)