diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..15aa8fe --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +venv +.env \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..f6aa831 --- /dev/null +++ b/main.py @@ -0,0 +1,306 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Interactive R&D Assistant: generates one research idea at a time (user decides when to stop), +then suggests 3 search terms per idea, searches mithal.space, +and saves everything in a markdown file. +""" + +import sys +import time +import json +import requests +from pathlib import Path +from datetime import datetime +from urllib.parse import quote +from dotenv import load_dotenv +load_dotenv() +import os + +# ============================================================================= +# 1. LLM configuration +# ============================================================================= +AUTH_TOKEN = os.getenv("AUTH_TOKEN") +BASE_URL = "https://chat.cumin.dev/api" +HEADERS = { + "Authorization": f"Bearer {AUTH_TOKEN}", + "Content-Type": "application/json" +} + +# Prompt for a SINGLE research idea (JSON format) +SINGLE_IDEA_PROMPT = """You are an expert R&D assistant specialized in AI, Machine Learning, and LLMs. +Generate exactly ONE creative, underexplored research direction. +Respond ONLY with a valid JSON object with exactly these fields: +- "title": short title in Arabic +- "explanation": brief explanation (2-3 sentences) in Arabic +- "novelty": why it's promising and novel evidence (1 sentence) in Arabic +- "reference": at least one paper or source (as a short text) in Arabic + +Example: +{ + "title": "عنوان الفكرة", + "explanation": "شرح موجز للفكرة...", + "novelty": "لماذا هذه الفكرة جديدة وواعدة...", + "reference": "مثال: ورقة علمية من arXiv..." +} +Output ONLY the JSON object, no extra text.""" + +def create_conversation(title="R&D Session"): + url = f"{BASE_URL}/conversations" + resp = requests.post(url, headers=HEADERS, json={"title": title}) + if resp.status_code in (200, 201): + return resp.json().get("id") + else: + print(f"Failed to create conversation: {resp.status_code} - {resp.text}") + return None + +def send_message(conv_id, text): + url = f"{BASE_URL}/conversations/{conv_id}/chat" + resp = requests.post(url, headers=HEADERS, json={"message": text, "files": []}) + return resp.status_code == 200 + +def get_conversation(conv_id): + url = f"{BASE_URL}/conversations/{conv_id}" + resp = requests.get(url, headers=HEADERS) + if resp.status_code == 200: + return resp.json() + return None + +def wait_for_assistant_reply(conv_id, last_msg_count, timeout=120, poll_interval=2): + """Wait for assistant reply, with interruptible polls.""" + start = time.time() + while time.time() - start < timeout: + conv = get_conversation(conv_id) + if conv and "messages" in conv and len(conv["messages"]) > last_msg_count: + last_msg = conv["messages"][-1] + if last_msg["role"] == "assistant": + return last_msg["content"] + time.sleep(poll_interval) + return None + +def get_llm_response(prompt, timeout=120): + """Create a fresh conversation, send prompt, wait for reply.""" + conv_id = create_conversation() + if not conv_id: + return None + if not send_message(conv_id, prompt): + return None + conv_data = get_conversation(conv_id) + if not conv_data: + return None + msg_count = len(conv_data.get("messages", [])) + print(" ⏳ Waiting for LLM response (up to {}s)...".format(timeout)) + reply = wait_for_assistant_reply(conv_id, msg_count, timeout=timeout) + return reply + +def parse_single_idea(raw_json_str): + """Parse LLM response into a dict; return None on failure.""" + try: + clean = raw_json_str.strip() + if clean.startswith("```json"): + clean = clean[7:] + if clean.startswith("```"): + clean = clean[3:] + if clean.endswith("```"): + clean = clean[:-3] + idea = json.loads(clean) + if isinstance(idea, dict) and all(k in idea for k in ("title", "explanation", "novelty", "reference")): + return idea + else: + print(" ❌ LLM response missing required fields.") + return None + except json.JSONDecodeError: + print(" ❌ Failed to parse JSON from LLM.") + print(" Raw response (first 500 chars):", raw_json_str[:500]) + return None + +# ============================================================================= +# 2. Search engine integration (no screenshots) +# ============================================================================= +from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError + +RESULT_LINK_SELECTOR = "a.result-link" +TITLE_SELECTOR = "h3" +URL_SELECTOR = "small.arabic-url" + +def search_mithal(query, retry_user_choice=True): + """ + Perform a single search on mithal.space and return top 3 results as list of (title, url). + If retry_user_choice is True, on failure ask user to retry or skip. + """ + search_url = f"https://mithal.space/search?q={quote(query, safe='')}" + print(f" 🔍 Searching: {query}") + + with sync_playwright() as p: + browser = p.chromium.launch(headless=True) + context = browser.new_context(locale="ar-SA", viewport={"width": 1280, "height": 900}) + page = context.new_page() + + while True: + try: + page.goto(search_url, wait_until="domcontentloaded", timeout=15000) + page.wait_for_selector(RESULT_LINK_SELECTOR, timeout=10000) + links = page.query_selector_all(RESULT_LINK_SELECTOR) + results = [] + for i, link in enumerate(links[:3]): + title_elem = link.query_selector(TITLE_SELECTOR) + title = title_elem.inner_text() if title_elem else "(بدون عنوان)" + url_elem = link.query_selector(URL_SELECTOR) + url = url_elem.get_attribute("data-url") if url_elem else link.get_attribute("href") + results.append((title.strip(), url.strip() if url else "#")) + browser.close() + return results + except PlaywrightTimeoutError: + print(f" ⚠️ Search timed out for: {query}") + if retry_user_choice: + choice = input(" Retry (r), Skip this term (s), or Abort whole script (a)? ").strip().lower() + if choice == 'r': + continue + elif choice == 's': + browser.close() + return [] + elif choice == 'a': + browser.close() + sys.exit(0) + else: + browser.close() + return [] + else: + return [] + except Exception as e: + print(f" ❌ Error searching: {e}") + if retry_user_choice: + choice = input(" Retry (r), Skip (s), or Abort (a)? ").strip().lower() + if choice == 'r': + continue + elif choice == 's': + browser.close() + return [] + elif choice == 'a': + browser.close() + sys.exit(0) + else: + return [] + +# ============================================================================= +# 3. Search term generation for a given idea +# ============================================================================= +def get_search_terms_for_idea(idea_title, idea_explanation, timeout=90): + """Ask LLM to generate 3 Arabic search queries for a given idea.""" + prompt = f"""Based on the following R&D idea: +Title: {idea_title} +Explanation: {idea_explanation} + +Generate exactly 3 Arabic search queries (short phrases) that would help find related research, papers, or projects online. +Return ONLY a JSON array of strings, e.g. ["استعلام 1", "استعلام 2", "استعلام 3"]. +No extra text.""" + reply = get_llm_response(prompt, timeout=timeout) + if not reply: + return None + try: + clean = reply.strip() + if clean.startswith("```json"): clean = clean[7:] + if clean.startswith("```"): clean = clean[3:] + if clean.endswith("```"): clean = clean[:-3] + terms = json.loads(clean) + if isinstance(terms, list) and all(isinstance(t, str) for t in terms): + return terms[:3] + else: + print(f" ⚠️ Invalid search terms format for idea: {idea_title}") + return None + except: + print(f" ⚠️ Could not parse search terms for idea: {idea_title}") + return None + +# ============================================================================= +# 4. Main interactive loop +# ============================================================================= +def main(): + print("="*70) + print("INTERACTIVE R&D ASSISTANT with integrated search") + print("Type 'stop' when asked for another idea, or press Ctrl+C to abort.") + print("="*70) + + # Create output file with timestamp + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + out_file = Path(f"rd_ideas_{timestamp}.md") + + idea_counter = 0 + out_file.write_text(f"# R&D Ideas Report\n*Generated on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}*\n\n", encoding="utf-8") + + while True: + idea_counter += 1 + print(f"\n📡 Generating idea #{idea_counter}...") + raw_idea = get_llm_response(SINGLE_IDEA_PROMPT, timeout=120) + if not raw_idea: + print("❌ Failed to get idea from LLM. Do you want to try again?") + retry = input("Try again? (y/n): ").strip().lower() + if retry == 'y': + continue + else: + break + + idea = parse_single_idea(raw_idea) + if not idea: + print("❌ Could not parse idea. Skipping this one.") + continue + + title = idea.get("title", "بدون عنوان") + explanation = idea.get("explanation", "") + novelty = idea.get("novelty", "") + reference = idea.get("reference", "") + + # Write idea to markdown file + with out_file.open("a", encoding="utf-8") as f: + f.write(f"## فكرة {idea_counter}: {title}\n\n") + f.write(f"**الشرح:** {explanation}\n\n") + f.write(f"**الجدة:** {novelty}\n\n") + f.write(f"**مرجع:** {reference}\n\n") + + print(f"\n✅ Idea {idea_counter}: {title}") + print(" Generating 3 search terms (LLM)...") + + terms = get_search_terms_for_idea(title, explanation, timeout=90) + if not terms: + with out_file.open("a", encoding="utf-8") as f: + f.write("**❌ فشل في توليد مصطلحات البحث**\n\n---\n\n") + print(" ⚠️ No search terms generated. Moving to next idea decision.\n") + else: + with out_file.open("a", encoding="utf-8") as f: + f.write("### مصطلحات البحث والنتائج\n\n") + + for term_idx, term in enumerate(terms, 1): + print(f" Term {term_idx}: {term}") + with out_file.open("a", encoding="utf-8") as f: + f.write(f"#### مصطلح {term_idx}: `{term}`\n\n") + + results = search_mithal(term, retry_user_choice=True) + if not results: + with out_file.open("a", encoding="utf-8") as f: + f.write("*لا توجد نتائج* \n\n") + else: + with out_file.open("a", encoding="utf-8") as f: + for rank, (res_title, res_url) in enumerate(results, 1): + f.write(f"{rank}. [{res_title}]({res_url}) \n") + f.write("\n") + + with out_file.open("a", encoding="utf-8") as f: + f.write("---\n\n") + + # Ask if user wants another idea + while True: + again = input("\n🔁 Do you want another research idea? (y/n): ").strip().lower() + if again in ('y', 'n'): + break + print("Please answer 'y' or 'n'.") + if again == 'n': + break + + print(f"\n🎉 Finished! Results saved to: {out_file.resolve()}") + +if __name__ == "__main__": + try: + main() + except KeyboardInterrupt: + print("\n\n⚠️ User interrupted. Exiting gracefully.") + sys.exit(0) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..f14357e Binary files /dev/null and b/requirements.txt differ