Skip to content

Commit 817ed3d

Browse files
authored
feat: Google scholar toolkit (#997)
1 parent 3978d47 commit 817ed3d

13 files changed

+692
-10
lines changed

camel/toolkits/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
from .google_maps_toolkit import GoogleMapsToolkit
3131
from .code_execution import CodeExecutionToolkit
3232
from .github_toolkit import GithubToolkit
33+
from .google_scholar_toolkit import GoogleScholarToolkit
3334
from .arxiv_toolkit import ArxivToolkit
3435
from .linkedin_toolkit import LinkedInToolkit
3536
from .reddit_toolkit import RedditToolkit
@@ -58,6 +59,7 @@
5859
'LinkedInToolkit',
5960
'RedditToolkit',
6061
'CodeExecutionToolkit',
62+
'GoogleScholarToolkit',
6163
'ArxivToolkit',
6264
'MATH_FUNCS',
6365
'SEARCH_FUNCS',

camel/toolkits/arxiv_toolkit.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,13 @@ def download_papers(
142142
return f"An error occurred: {e}"
143143

144144
def get_tools(self) -> List[FunctionTool]:
145+
r"""Returns a list of FunctionTool objects representing the
146+
functions in the toolkit.
147+
148+
Returns:
149+
List[FunctionTool]: A list of FunctionTool objects
150+
representing the functions in the toolkit.
151+
"""
145152
return [
146153
FunctionTool(self.search_papers),
147154
FunctionTool(self.download_papers),
Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
2+
# Licensed under the Apache License, Version 2.0 (the “License”);
3+
# you may not use this file except in compliance with the License.
4+
# You may obtain a copy of the License at
5+
#
6+
# http://www.apache.org/licenses/LICENSE-2.0
7+
#
8+
# Unless required by applicable law or agreed to in writing, software
9+
# distributed under the License is distributed on an “AS IS” BASIS,
10+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
# See the License for the specific language governing permissions and
12+
# limitations under the License.
13+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
14+
import re
15+
from typing import List, Optional
16+
17+
from camel.toolkits import FunctionTool
18+
from camel.toolkits.base import BaseToolkit
19+
20+
21+
class GoogleScholarToolkit(BaseToolkit):
22+
r"""A toolkit for retrieving information about authors and their
23+
publications from Google Scholar.
24+
25+
Attributes:
26+
author_identifier (Union[str, None]): The author's Google Scholar URL
27+
or name of the author to search for.
28+
is_author_name (bool): Flag to indicate if the identifier is a name.
29+
(default: :obj:`False`)
30+
scholarly (module): The scholarly module for querying Google Scholar.
31+
"""
32+
33+
def __init__(
34+
self, author_identifier: str, is_author_name: bool = False
35+
) -> None:
36+
r"""Initializes the GoogleScholarToolkit with the author's identifier.
37+
38+
Args:
39+
author_identifier (str): The author's Google Scholar URL or name
40+
of the author to search for.
41+
is_author_name (bool): Flag to indicate if the identifier is a
42+
name. (default: :obj:`False`)
43+
"""
44+
from scholarly import scholarly
45+
46+
self.scholarly = scholarly
47+
self.author_identifier = author_identifier
48+
self.is_author_name = is_author_name
49+
50+
def _extract_author_id(self) -> Optional[str]:
51+
r"""Extracts the author ID from a Google Scholar URL if provided.
52+
53+
Returns:
54+
Optional[str]: The extracted author ID, or None if not found.
55+
"""
56+
match = re.search(r'user=([A-Za-z0-9-]+)', self.author_identifier)
57+
return match.group(1) if match else None
58+
59+
def get_author_detailed_info(
60+
self,
61+
) -> dict:
62+
r"""Retrieves detailed information about the author.
63+
64+
Returns:
65+
dict: A dictionary containing detailed information about the
66+
author.
67+
"""
68+
if self.is_author_name:
69+
search_query = self.scholarly.search_author(self.author_identifier)
70+
# Retrieve the first result from the iterator
71+
first_author_result = next(search_query)
72+
else:
73+
author_id = self._extract_author_id()
74+
first_author_result = self.scholarly.search_author_id(id=author_id)
75+
76+
author = self.scholarly.fill(first_author_result)
77+
return author
78+
79+
def get_author_publications(
80+
self,
81+
) -> List[str]:
82+
r"""Retrieves the titles of the author's publications.
83+
84+
Returns:
85+
List[str]: A list of publication titles authored by the author.
86+
"""
87+
author = self.get_author_detailed_info()
88+
publication_titles = [
89+
pub['bib']['title'] for pub in author['publications']
90+
]
91+
return publication_titles
92+
93+
def get_publication_by_title(
94+
self, publication_title: str
95+
) -> Optional[dict]:
96+
r"""Retrieves detailed information about a specific publication by its
97+
title. Note that this method cannot retrieve the full content of the
98+
paper.
99+
100+
Args:
101+
publication_title (str): The title of the publication to search
102+
for.
103+
104+
Returns:
105+
Optional[dict]: A dictionary containing detailed information about
106+
the publication if found; otherwise, `None`.
107+
"""
108+
author = self.get_author_detailed_info()
109+
publications = author['publications']
110+
for publication in publications:
111+
if publication['bib']['title'] == publication_title:
112+
return self.scholarly.fill(publication)
113+
return None # Return None if not found
114+
115+
def get_full_paper_content_by_link(self, pdf_url: str) -> Optional[str]:
116+
r"""Retrieves the full paper content from a given PDF URL using the
117+
arxiv2text tool.
118+
119+
Args:
120+
pdf_url (str): The URL of the PDF file.
121+
122+
Returns:
123+
Optional[str]: The full text extracted from the PDF, or `None` if
124+
an error occurs.
125+
"""
126+
from arxiv2text import arxiv_to_text
127+
128+
try:
129+
return arxiv_to_text(pdf_url)
130+
except Exception:
131+
return None # Return None in case of any error
132+
133+
def get_tools(self) -> List[FunctionTool]:
134+
r"""Returns a list of FunctionTool objects representing the
135+
functions in the toolkit.
136+
137+
Returns:
138+
List[FunctionTool]: A list of FunctionTool objects
139+
representing the functions in the toolkit.
140+
"""
141+
return [
142+
FunctionTool(self.get_author_detailed_info),
143+
FunctionTool(self.get_author_publications),
144+
FunctionTool(self.get_publication_by_title),
145+
FunctionTool(self.get_full_paper_content_by_link),
146+
]

examples/test/test_ai_society_example.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@
1414
from mock import patch
1515

1616
import examples.ai_society.role_playing
17-
import examples.function_call.role_playing_with_functions
1817
import examples.models.role_playing_with_open_source_model
18+
import examples.tool_call.role_playing_with_functions
1919
from camel.models import ModelFactory
2020
from camel.types import ModelPlatformType, ModelType
2121

@@ -35,9 +35,7 @@ def test_ai_society_role_playing_example():
3535

3636
def test_role_playing_with_function_example():
3737
with patch('time.sleep', return_value=None):
38-
examples.function_call.role_playing_with_functions.main(
39-
chat_turn_limit=2
40-
)
38+
examples.tool_call.role_playing_with_functions.main(chat_turn_limit=2)
4139

4240

4341
def test_role_playing_with_open_source_model():
File renamed without changes.
Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
2+
# Licensed under the Apache License, Version 2.0 (the “License”);
3+
# you may not use this file except in compliance with the License.
4+
# You may obtain a copy of the License at
5+
#
6+
# http://www.apache.org/licenses/LICENSE-2.0
7+
#
8+
# Unless required by applicable law or agreed to in writing, software
9+
# distributed under the License is distributed on an “AS IS” BASIS,
10+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
# See the License for the specific language governing permissions and
12+
# limitations under the License.
13+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
14+
15+
from camel.agents import ChatAgent
16+
from camel.configs.openai_config import ChatGPTConfig
17+
from camel.messages import BaseMessage
18+
from camel.models import ModelFactory
19+
from camel.toolkits import GoogleScholarToolkit
20+
from camel.types import ModelPlatformType, ModelType
21+
22+
# Define system message
23+
sys_msg = BaseMessage.make_assistant_message(
24+
role_name="Tools calling opertor", content="You are a helpful assistant"
25+
)
26+
27+
# Set model config
28+
tools = GoogleScholarToolkit(
29+
author_identifier="https://scholar.google.com/citations?user=JicYPdAAAAAJ&hl=en&oi=ao"
30+
).get_tools()
31+
model_config_dict = ChatGPTConfig(
32+
temperature=0.0,
33+
).as_dict()
34+
35+
model = ModelFactory.create(
36+
model_platform=ModelPlatformType.OPENAI,
37+
model_type=ModelType.GPT_4O_MINI,
38+
model_config_dict=model_config_dict,
39+
)
40+
41+
# Set agent
42+
camel_agent = ChatAgent(
43+
system_message=sys_msg,
44+
model=model,
45+
tools=tools,
46+
)
47+
camel_agent.reset()
48+
49+
# Define a user message
50+
usr_msg = BaseMessage.make_user_message(
51+
role_name="CAMEL User",
52+
content="get the detailed information of this author",
53+
)
54+
55+
# Get response information
56+
response = camel_agent.step(usr_msg)
57+
print(str(response.info['tool_calls'])[:1000])
58+
"""
59+
===============================================================================
60+
[FunctionCallingRecord(func_name='get_author_detailed_info', args={}, result=
61+
{'container_type': 'Author', 'filled': ['basics', 'indices', 'counts',
62+
'coauthors', 'publications', 'public_access'], 'scholar_id': 'JicYPdAAAAAJ',
63+
'source': <AuthorSource.AUTHOR_PROFILE_PAGE: 'AUTHOR_PROFILE_PAGE'>, 'name':
64+
'Geoffrey Hinton', 'url_picture': 'https://scholar.googleusercontent.com/
65+
citations?view_op=view_photo&user=JicYPdAAAAAJ&citpid=2', 'affiliation':
66+
'Emeritus Prof. Computer Science, University of Toronto', 'organization':
67+
8515235176732148308, 'interests': ['machine learning', 'psychology',
68+
'artificial intelligence', 'cognitive science', 'computer science'],
69+
'email_domain': '@cs.toronto.edu', 'homepage': 'http://www.cs.toronto.edu/
70+
~hinton', 'citedby': 853541, 'citedby5y': 560063, 'hindex': 186, 'hindex5y':
71+
137, 'i10index': 483, 'i10index5y': 368, 'cites_per_year': {1989: 2627, 1990:
72+
3589, 1991: 3766, 1992: 4091, 1993: 4573, 1994: 4499, 1995: 4090, 1996: 3935,
73+
1997: 3740, 1998: 3744, 1999: 3559, 2000: 3292, 2001: 3398, 2002: 3713, 2003:
74+
3670, 2004: 3393, 2005: 3813, 2006: 4168, 2007: 4558, 2008: 4349, 2009: 4784,
75+
2010: 5238, 2011: 5722, 2012: 6746, 2013: 9900, 2014: 12751, 2015: 18999,
76+
2016: 29932, 2017: 43675, 2018: 63544, 2019: 80800, 2020: 90523, 2021: 101735,
77+
2022: 104036, 2023: 106452, 2024: 76413}, 'coauthors': [{'container_type':
78+
'Author', 'filled': [], 'scholar_id': 'm1qAiOUAAAAJ', 'source': <AuthorSource.
79+
CO_AUTHORS_LIST: 'CO_AUTHORS_LIST'>, 'name': 'Terrence Sejnowski',
80+
'affiliation': 'Francis Crick Professor, Salk Institute, Distingished
81+
Professor, UC San Diego'}, {'container_type': 'Author', 'filled': [],
82+
'scholar_id': 'RnoIxUwAAAAJ', 'source': <AuthorSource.CO_AUTHORS_LIST:
83+
'CO_AUTHORS_LIST'>, 'name': 'Vinod Nair', 'affiliation': 'Research Scientist,
84+
DeepMind'}, {'container_type': 'Author', 'filled': [], 'scholar_id':
85+
'ghbWy-0AAAAJ', 'source': <AuthorSource.CO_AUTHORS_LIST: 'CO_AUTHORS_LIST'>,
86+
'name': 'George E. Dahl', 'affiliation': 'Google Inc.'}, {'container_
87+
===============================================================================
88+
"""
89+
90+
# Define a user message
91+
usr_msg = BaseMessage.make_user_message(
92+
role_name="CAMEL User", content="get the publications of this author"
93+
)
94+
95+
# Get response information
96+
response = camel_agent.step(usr_msg)
97+
print(str(response.info['tool_calls'])[:1000])
98+
"""
99+
===============================================================================
100+
[FunctionCallingRecord(func_name='get_author_publications', args={}, result=
101+
['Imagenet classification with deep convolutional neural networks', 'Deep
102+
learning', 'Learning internal representations by error-propagation', 'Dropout:
103+
a simple way to prevent neural networks from overfitting', 'Visualizing data
104+
using t-SNE', 'Learning representations by back-propagating errors', 'Learning
105+
multiple layers of features from tiny images', 'Rectified linear units improve
106+
restricted boltzmann machines', 'Reducing the dimensionality of data with
107+
neural networks', 'A fast learning algorithm for deep belief nets',
108+
'Distilling the Knowledge in a Neural Network', 'A simple framework for
109+
contrastive learning of visual representations', 'Deep neural networks for
110+
acoustic modeling in speech recognition: The shared views of four research
111+
groups', 'Layer normalization', 'Speech recognition with deep recurrent neural
112+
networks', 'Improving neural networks by preventing co-adaptation of feature
113+
detectors', 'Lec
114+
===============================================================================
115+
"""
116+
117+
# ruff: noqa: E501
118+
# Define a user message
119+
usr_msg = BaseMessage.make_user_message(
120+
role_name="CAMEL User",
121+
content="""get the detailed information for publication with title: `Camel: Communicative agents for" mind" exploration of large language model society`""",
122+
)
123+
124+
# Get response information
125+
response = camel_agent.step(usr_msg)
126+
print(response.info['tool_calls'])
127+
"""
128+
===============================================================================
129+
[FunctionCallingRecord(func_name='get_publication_by_title', args=
130+
{'publication_title': 'Camel: Communicative agents for" mind" exploration of
131+
large language model society'}, result={'container_type': 'Publication',
132+
'source': <PublicationSource.AUTHOR_PUBLICATION_ENTRY:
133+
'AUTHOR_PUBLICATION_ENTRY'>, 'bib': {'title': 'Camel: Communicative agents
134+
for" mind" exploration of large language model society', 'pub_year': 2023,
135+
'citation': 'Advances in Neural Information Processing Systems 36, 2023',
136+
'author': 'Guohao Li and Hasan Hammoud and Hani Itani and Dmitrii Khizbullin
137+
and Bernard Ghanem', 'journal': 'Advances in Neural Information Processing
138+
Systems', 'volume': '36', 'abstract': 'The rapid advancement of chat-based
139+
language models has led to remarkable progress in complex task-solving.
140+
However, their success heavily relies on human input to guide the
141+
conversation, which can be challenging and time-consuming. This paper explores
142+
the potential of building scalable techniques to facilitate autonomous
143+
cooperation among communicative agents, and provides insight into their
144+
“cognitive” processes. To address the challenges of achieving autonomous
145+
cooperation, we propose a novel communicative agent framework named
146+
role-playing. Our approach involves using inception prompting to guide chat
147+
agents toward task completion while maintaining consistency with human
148+
intentions. We showcase how role-playing can be used to generate
149+
conversational data for studying the behaviors and capabilities of a society
150+
of agents, providing a valuable resource for investigating conversational
151+
language models. In particular, we conduct comprehensive studies on
152+
instruction-following cooperation in multi-agent settings. Our contributions
153+
include introducing a novel communicative agent framework, offering a scalable
154+
approach for studying the cooperative behaviors and capabilities of
155+
multi-agent systems, and open-sourcing our library to support research on
156+
communicative agents and beyond: https://github. com/camel-ai/camel.'},
157+
'filled': True, 'author_pub_id': 'J9K-D0sAAAAJ:_Qo2XoVZTnwC', 'num_citations':
158+
364, 'citedby_url': '/scholar?hl=en&cites=3976259482297250805', 'cites_id':
159+
['3976259482297250805'], 'pub_url': 'https://proceedings.neurips.cc/
160+
paper_files/paper/2023/hash/
161+
a3621ee907def47c1b952ade25c67698-Abstract-Conference.html',
162+
'url_related_articles': '/scholar?oi=bibs&hl=en&q=related:9TMbme6CLjcJ:scholar.
163+
google.com/', 'cites_per_year': {2023: 95, 2024: 269}})]
164+
===============================================================================
165+
"""
166+
167+
usr_msg = BaseMessage.make_user_message(
168+
role_name="CAMEL User",
169+
content="""get the full information for paper from link: `https://hal.science/hal-04206682/document`""",
170+
)
171+
172+
# Get response information
173+
response = camel_agent.step(usr_msg)
174+
print((response.info['tool_calls'])[:1000])
175+
"""
176+
===============================================================================
177+
[FunctionCallingRecord(func_name='get_full_paper_content_by_link', args=
178+
{'pdf_url': 'https://hal.science/hal-04206682/document'}, result='Deep
179+
learning\nYann Lecun, Yoshua Bengio, Geoffrey Hinton\n\nTo cite this
180+
version:\n\nYann Lecun, Yoshua Bengio, Geoffrey Hinton. Deep learning. Nature,
181+
2015, 521 (7553), pp.436-444.\n\uffff10.1038/nature14539\uffff.
182+
\uffffhal-04206682\uffff\n\nHAL Id: hal-04206682\n\nhttps://hal.science/
183+
hal-04206682v1\n\nSubmitted on 14 Sep 2023\n\nHAL is a multi-disciplinary open
184+
access\narchive for the deposit and dissemination of sci-\nentific research
185+
documents, whether they are pub-\nlished or not. The documents may come
186+
from\nteaching and research institutions in France or\nabroad, or from public
187+
or private research centers.\n\nL'archive ouverte pluridisciplinaire HAL,
188+
est\ndestinée au dépôt et à la diffusion de documents\nscientifiques de niveau
189+
recherche, publiés ou non,\némanant des établissements d'enseignement et
190+
de\nrecherche français ou étrangers, des laboratoires\npublics ou privés.
191+
\n\n\x0cDeep learning\n\nYann LeCun1,2, Yoshua Bengio3 & Geoffrey Hinton4,
192+
5\n\n1Facebook AI Research, 770 Broadway, New York, New York 10003 USA\n\n2N..
193+
===============================================================================
194+
"""
File renamed without changes.
File renamed without changes.

0 commit comments

Comments
 (0)