Skill Index

InnoClaw/

protocol-extraction-from-pdf

community[skill]

Extract laboratory protocols from PDF documents using Thoth-Plan to convert experimental procedures into structured text.

$/plugin install InnoClaw

details

Protocol Extraction from PDF

Usage

import asyncio
import json
from contextlib import AsyncExitStack
from mcp.client.streamable_http import streamablehttp_client
from mcp import ClientSession

class ThothClient:
    def __init__(self, server_url: str):
        self.server_url = server_url
        self.session = None

    async def connect(self):
        try:
            self.transport = streamablehttp_client(url=self.server_url, sse_read_timeout=60 * 10)
            self._stack = AsyncExitStack()
            await self._stack.__aenter__()
            self.read, self.write, self.get_session_id = await self._stack.enter_async_context(self.transport)
            self.session_ctx = ClientSession(self.read, self.write)
            self.session = await self._stack.enter_async_context(self.session_ctx)
            await self.session.initialize()
            return True
        except Exception as e:
            return False

    async def disconnect(self):
        """Disconnect from server"""
        try:
            if hasattr(self, '_stack'):
                await self._stack.aclose()
            print("✓ already disconnect")
        except Exception as e:
            print(f"✗ disconnect error: {e}")
    def parse_result(self, result):
        try:
            if hasattr(result, 'content') and result.content:
                content = result.content[0]
                if hasattr(content, 'text'):
                    return json.loads(content.text)
            return str(result)
        except:
            try:
                return result.content[0].text
            except:
                return {"error": "parse error", "raw": str(result)}

## Initialize and use
client = ThothClient("https://scp.intern-ai.org.cn/api/v1/mcp/19/Thoth-Plan")
await client.connect()

# PDF URL (must be publicly accessible)
pdf_url = "https://example.com/protocol.pdf"

result = await client.session.call_tool("extract_protocol_from_pdf", arguments={"pdf_url": pdf_url})
protocol = client.parse_result(result)
print("Extracted Protocol:")
print(protocol)

await client.disconnect()

Tool: extract_protocol_from_pdf

  • Args: pdf_url (str) - Public URL to PDF file
  • Returns: Structured protocol text with numbered steps

Use Cases

  • Lab protocol digitization, SOP extraction, experimental workflow documentation

technical

github
SpectrAI-Initiative/InnoClaw
stars
374
license
Apache-2.0
contributors
16
last commit
2026-04-20T01:27:21Z
file
.claude/skills/protocol-extraction-from-pdf/SKILL.md

related