indra-ipd commited on Oct 29, 2024

Commit

b9fab8d

1 Parent(s): 197c331

add model checkpoint

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

__init__.py +0 -5
__pycache__/__init__.cpython-310.pyc +0 -0
__pycache__/load.cpython-310.pyc +0 -0
graph_grammar/.DS_Store +0 -0
graph_grammar/__init__.py +0 -19
graph_grammar/__pycache__/__init__.cpython-310.pyc +0 -0
graph_grammar/__pycache__/hypergraph.cpython-310.pyc +0 -0
graph_grammar/algo/__init__.py +0 -20
graph_grammar/algo/__pycache__/__init__.cpython-310.pyc +0 -0
graph_grammar/algo/__pycache__/tree_decomposition.cpython-310.pyc +0 -0
graph_grammar/algo/tree_decomposition.py +0 -821
graph_grammar/graph_grammar/__init__.py +0 -20
graph_grammar/graph_grammar/__pycache__/__init__.cpython-310.pyc +0 -0
graph_grammar/graph_grammar/__pycache__/base.cpython-310.pyc +0 -0
graph_grammar/graph_grammar/__pycache__/corpus.cpython-310.pyc +0 -0
graph_grammar/graph_grammar/__pycache__/hrg.cpython-310.pyc +0 -0
graph_grammar/graph_grammar/__pycache__/symbols.cpython-310.pyc +0 -0
graph_grammar/graph_grammar/__pycache__/utils.cpython-310.pyc +0 -0
graph_grammar/graph_grammar/base.py +0 -30
graph_grammar/graph_grammar/corpus.py +0 -152
graph_grammar/graph_grammar/hrg.py +0 -1065
graph_grammar/graph_grammar/symbols.py +0 -180
graph_grammar/graph_grammar/utils.py +0 -130
graph_grammar/hypergraph.py +0 -544
graph_grammar/io/__init__.py +0 -20
graph_grammar/io/__pycache__/__init__.cpython-310.pyc +0 -0
graph_grammar/io/__pycache__/smi.cpython-310.pyc +0 -0
graph_grammar/io/smi.py +0 -559
graph_grammar/nn/__init__.py +0 -11
graph_grammar/nn/__pycache__/__init__.cpython-310.pyc +0 -0
graph_grammar/nn/__pycache__/decoder.cpython-310.pyc +0 -0
graph_grammar/nn/__pycache__/encoder.cpython-310.pyc +0 -0
graph_grammar/nn/dataset.py +0 -121
graph_grammar/nn/decoder.py +0 -158
graph_grammar/nn/encoder.py +0 -199
graph_grammar/nn/graph.py +0 -313
load.py +0 -83
mhg_gnn.egg-info/PKG-INFO +0 -102
mhg_gnn.egg-info/SOURCES.txt +0 -46
mhg_gnn.egg-info/dependency_links.txt +0 -1
mhg_gnn.egg-info/requires.txt +0 -7
mhg_gnn.egg-info/top_level.txt +0 -2
pickles/mhggnn_pretrained_model_0724_2023.pickle → mhggnn_pretrained_model_0724_2023.pickle +0 -0
models/__init__.py +0 -5
models/__pycache__/__init__.cpython-310.pyc +0 -0
models/__pycache__/mhgvae.cpython-310.pyc +0 -0
models/mhgvae.py +0 -956
notebooks/mhg-gnn_encoder_decoder_example.ipynb +0 -114
paper/MHG-GNN_Combination of Molecular Hypergraph Grammar with Graph Neural Network.pdf +0 -0
pickles/.DS_Store +0 -0

__init__.py DELETED Viewed

@@ -1,5 +0,0 @@
-# -*- coding:utf-8 -*-
-# Rhizome
-# Version beta 0.0, August 2023
-# Property of IBM Research, Accelerated Discovery
-#

__pycache__/__init__.cpython-310.pyc DELETED Viewed

Binary file (214 Bytes)

__pycache__/load.cpython-310.pyc DELETED Viewed

Binary file (3.04 kB)

graph_grammar/.DS_Store DELETED Viewed

Binary file (8.2 kB)

graph_grammar/__init__.py DELETED Viewed

@@ -1,19 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# Rhizome
-# Version beta 0.0, August 2023
-# Property of IBM Research, Accelerated Discovery
-#
-"""
-PLEASE NOTE THIS IMPLEMENTATION INCLUDES THE ORIGINAL SOURCE CODE (AND SOME ADAPTATIONS)
-OF THE MHG IMPLEMENTATION OF HIROSHI KAJINO AT IBM TRL ALREADY PUBLICLY AVAILABLE.
-THIS MIGHT INFLUENCE THE DECISION OF THE FINAL LICENSE SO CAREFUL CHECK NEEDS BE DONE.
-"""
-""" Title """
-__author__ = "Hiroshi Kajino <KAJINO@jp.ibm.com>"
-__copyright__ = "(c) Copyright IBM Corp. 2018"
-__version__ = "0.1"
-__date__ = "Jan 1 2018"

graph_grammar/__pycache__/__init__.cpython-310.pyc DELETED Viewed

Binary file (666 Bytes)

graph_grammar/__pycache__/hypergraph.cpython-310.pyc DELETED Viewed

Binary file (15.3 kB)

graph_grammar/algo/__init__.py DELETED Viewed

@@ -1,20 +0,0 @@
-#!/usr/bin/env python
-# -*- coding:utf-8 -*-
-# Rhizome
-# Version beta 0.0, August 2023
-# Property of IBM Research, Accelerated Discovery
-#
-"""
-PLEASE NOTE THIS IMPLEMENTATION INCLUDES THE ORIGINAL SOURCE CODE (AND SOME ADAPTATIONS)
-OF THE MHG IMPLEMENTATION OF HIROSHI KAJINO AT IBM TRL ALREADY PUBLICLY AVAILABLE.
-THIS MIGHT INFLUENCE THE DECISION OF THE FINAL LICENSE SO CAREFUL CHECK NEEDS BE DONE.
-"""
-""" Title """
-__author__ = "Hiroshi Kajino <KAJINO@jp.ibm.com>"
-__copyright__ = "(c) Copyright IBM Corp. 2018"
-__version__ = "0.1"
-__date__ = "Jan 1 2018"

graph_grammar/algo/__pycache__/__init__.cpython-310.pyc DELETED Viewed

Binary file (659 Bytes)

graph_grammar/algo/__pycache__/tree_decomposition.cpython-310.pyc DELETED Viewed

Binary file (19.5 kB)

graph_grammar/algo/tree_decomposition.py DELETED Viewed

@@ -1,821 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# Rhizome
-# Version beta 0.0, August 2023
-# Property of IBM Research, Accelerated Discovery
-#
-"""
-PLEASE NOTE THIS IMPLEMENTATION INCLUDES THE ORIGINAL SOURCE CODE (AND SOME ADAPTATIONS)
-OF THE MHG IMPLEMENTATION OF HIROSHI KAJINO AT IBM TRL ALREADY PUBLICLY AVAILABLE.
-THIS MIGHT INFLUENCE THE DECISION OF THE FINAL LICENSE SO CAREFUL CHECK NEEDS BE DONE.
-"""
-""" Title """
-__author__ = "Hiroshi Kajino <KAJINO@jp.ibm.com>"
-__copyright__ = "(c) Copyright IBM Corp. 2017"
-__version__ = "0.1"
-__date__ = "Dec 11 2017"
-from copy import deepcopy
-from itertools import combinations
-from ..hypergraph import Hypergraph
-import networkx as nx
-import numpy as np
-class CliqueTree(nx.Graph):
-    ''' clique tree object
-    Attributes
-    ----------
-    hg : Hypergraph
-        This hypergraph will be decomposed.
-    root_hg : Hypergraph
-        Hypergraph on the root node.
-    ident_node_dict : dict
-        ident_node_dict[key_node] gives a list of nodes that are identical (i.e., the adjacent hyperedges are common)
-    '''
-    def __init__(self, hg=None, **kwargs):
-        self.hg = deepcopy(hg)
-        if self.hg is not None:
-            self.ident_node_dict = self.hg.get_identical_node_dict()
-        else:
-            self.ident_node_dict = {}
-        super().__init__(**kwargs)
-    @property
-    def root_hg(self):
-        ''' return the hypergraph on the root node
-        '''
-        return self.nodes[0]['subhg']
-    @root_hg.setter
-    def root_hg(self, hypergraph):
-        ''' set the hypergraph on the root node
-        '''
-        self.nodes[0]['subhg'] = hypergraph
-    def insert_subhg(self, subhypergraph: Hypergraph) -> None:
-        ''' insert a subhypergraph, which is extracted from a root hypergraph, into the tree.
-        Parameters
-        ----------
-        subhg : Hypergraph
-        '''
-        num_nodes = self.number_of_nodes()
-        self.add_node(num_nodes, subhg=subhypergraph)
-        self.add_edge(num_nodes, 0)
-        adj_nodes = deepcopy(list(self.adj[0].keys()))
-        for each_node in adj_nodes:
-            if len(self.nodes[each_node]["subhg"].nodes.intersection(
-                    self.nodes[num_nodes]["subhg"].nodes)\
-                   - self.root_hg.nodes) != 0 and each_node != num_nodes:
-                self.remove_edge(0, each_node)
-                self.add_edge(each_node, num_nodes)
-    def to_irredundant(self) -> None:
-        ''' convert the clique tree to be irredundant
-        '''
-        for each_node in self.hg.nodes:
-            subtree = self.subgraph([
-                each_tree_node for each_tree_node in self.nodes()\
-                if each_node in self.nodes[each_tree_node]["subhg"].nodes]).copy()
-            leaf_node_list = [x for x in subtree.nodes() if subtree.degree(x)==1]
-            redundant_leaf_node_list = []
-            for each_leaf_node in leaf_node_list:
-                if len(self.nodes[each_leaf_node]["subhg"].adj_edges(each_node)) == 0:
-                    redundant_leaf_node_list.append(each_leaf_node)
-            for each_red_leaf_node in redundant_leaf_node_list:
-                current_node = each_red_leaf_node
-                while subtree.degree(current_node) == 1 \
-                      and len(subtree.nodes[current_node]["subhg"].adj_edges(each_node)) == 0:
-                    self.nodes[current_node]["subhg"].remove_node(each_node)
-                    remove_node = current_node
-                    current_node = list(dict(subtree[remove_node]).keys())[0]
-                    subtree.remove_node(remove_node)
-        fixed_node_set = deepcopy(self.nodes)
-        for each_node in fixed_node_set:
-            if self.nodes[each_node]["subhg"].num_edges == 0:
-                if len(self[each_node]) == 1:
-                    self.remove_node(each_node)
-                elif len(self[each_node]) == 2:
-                    self.add_edge(*self[each_node])
-                    self.remove_node(each_node)
-                else:
-                    pass
-            else:
-                pass
-        redundant = True
-        while redundant:
-            redundant = False
-            fixed_edge_set = deepcopy(self.edges)
-            remove_node_set = set()
-            for node_1, node_2 in fixed_edge_set:
-                if node_1 in remove_node_set or node_2 in remove_node_set:
-                    pass
-                else:
-                    if self.nodes[node_1]['subhg'].is_subhg(self.nodes[node_2]['subhg']):
-                        redundant = True
-                        adj_node_list = set(self.adj[node_1]) - {node_2}
-                        self.remove_node(node_1)
-                        remove_node_set.add(node_1)
-                        for each_node in adj_node_list:
-                            self.add_edge(node_2, each_node)
-                    elif self.nodes[node_2]['subhg'].is_subhg(self.nodes[node_1]['subhg']):
-                        redundant = True
-                        adj_node_list = set(self.adj[node_2]) - {node_1}
-                        self.remove_node(node_2)
-                        remove_node_set.add(node_2)
-                        for each_node in adj_node_list:
-                            self.add_edge(node_1, each_node)
-    def node_update(self, key_node: str, subhg) -> None:
-        """ given a pair of a hypergraph, H, and its subhypergraph, sH, return a hypergraph H\sH.
-        Parameters
-        ----------
-        key_node : str
-            key node that must be removed.
-        subhg : Hypegraph
-        """
-        for each_edge in subhg.edges:
-            self.root_hg.remove_edge(each_edge)
-        self.root_hg.remove_nodes(self.ident_node_dict[key_node])
-        adj_node_list = list(subhg.nodes)
-        for each_node in subhg.nodes:
-            if each_node not in self.ident_node_dict[key_node]:
-                if set(self.root_hg.adj_edges(each_node)).issubset(subhg.edges):
-                    self.root_hg.remove_node(each_node)
-                    adj_node_list.remove(each_node)
-            else:
-                adj_node_list.remove(each_node)
-        for each_node_1, each_node_2 in combinations(adj_node_list, 2):
-            if not self.root_hg.is_adj(each_node_1, each_node_2):
-                self.root_hg.add_edge(set([each_node_1, each_node_2]), attr_dict=dict(tmp=True))
-        subhg.remove_edges_with_attr({'tmp' : True})
-        self.insert_subhg(subhg)
-    def update(self, subhg, remove_nodes=False):
-        """ given a pair of a hypergraph, H, and its subhypergraph, sH, return a hypergraph H\sH.
-        Parameters
-        ----------
-        subhg : Hypegraph
-        """
-        for each_edge in subhg.edges:
-            self.root_hg.remove_edge(each_edge)
-        if remove_nodes:
-            remove_edge_list = []
-            for each_edge in self.root_hg.edges:
-                if set(self.root_hg.nodes_in_edge(each_edge)).issubset(subhg.nodes)\
-                   and self.root_hg.edge_attr(each_edge).get('tmp', False):
-                    remove_edge_list.append(each_edge)
-            self.root_hg.remove_edges(remove_edge_list)
-        adj_node_list = list(subhg.nodes)
-        for each_node in subhg.nodes:
-            if self.root_hg.degree(each_node) == 0:
-                self.root_hg.remove_node(each_node)
-                adj_node_list.remove(each_node)
-        if len(adj_node_list) != 1 and not remove_nodes:
-            self.root_hg.add_edge(set(adj_node_list), attr_dict=dict(tmp=True))
-        '''
-        else:
-            for each_node_1, each_node_2 in combinations(adj_node_list, 2):
-                if not self.root_hg.is_adj(each_node_1, each_node_2):
-                    self.root_hg.add_edge(
-                        [each_node_1, each_node_2], attr_dict=dict(tmp=True))
-        '''
-        subhg.remove_edges_with_attr({'tmp':True})
-        self.insert_subhg(subhg)
-def _get_min_deg_node(hg, ident_node_dict: dict, mode='mol'):
-    if mode == 'standard':
-        degree_dict = hg.degrees()
-        min_deg_node = min(degree_dict, key=degree_dict.get)
-        min_deg_subhg = hg.adj_subhg(min_deg_node, ident_node_dict)
-        return min_deg_node, min_deg_subhg
-    elif mode == 'mol':
-        degree_dict = hg.degrees()
-        min_deg = min(degree_dict.values())
-        min_deg_node_list = [each_node for each_node in hg.nodes if degree_dict[each_node]==min_deg]
-        min_deg_subhg_list = [hg.adj_subhg(each_min_deg_node, ident_node_dict)
-                              for each_min_deg_node in min_deg_node_list]
-        best_score = np.inf
-        best_idx = -1
-        for each_idx in range(len(min_deg_subhg_list)):
-            if min_deg_subhg_list[each_idx].num_nodes < best_score:
-                best_idx = each_idx
-        return min_deg_node_list[each_idx], min_deg_subhg_list[each_idx]
-    else:
-        raise ValueError
-def tree_decomposition(hg, irredundant=True):
-    """ compute a tree decomposition of the input hypergraph
-    Parameters
-    ----------
-    hg : Hypergraph
-        hypergraph to be decomposed
-    irredundant : bool
-        if True, irredundant tree decomposition will be computed.
-    Returns
-    -------
-    clique_tree : nx.Graph
-        each node contains a subhypergraph of `hg`
-    """
-    org_hg = hg.copy()
-    ident_node_dict = hg.get_identical_node_dict()
-    clique_tree = CliqueTree(org_hg)
-    clique_tree.add_node(0, subhg=org_hg)
-    while True:
-        degree_dict = org_hg.degrees()
-        min_deg_node = min(degree_dict, key=degree_dict.get)
-        min_deg_subhg = org_hg.adj_subhg(min_deg_node, ident_node_dict)
-        if org_hg.nodes == min_deg_subhg.nodes:
-            break
-        # org_hg and min_deg_subhg are divided
-        clique_tree.node_update(min_deg_node, min_deg_subhg)
-    clique_tree.root_hg.remove_edges_with_attr({'tmp' : True})
-    if irredundant:
-        clique_tree.to_irredundant()
-    return clique_tree
-def tree_decomposition_with_hrg(hg, hrg, irredundant=True, return_root=False):
-    ''' compute a tree decomposition given a hyperedge replacement grammar.
-    the resultant clique tree should induce a less compact HRG.
-    Parameters
-    ----------
-    hg : Hypergraph
-        hypergraph to be decomposed
-    hrg : HyperedgeReplacementGrammar
-        current HRG
-    irredundant : bool
-        if True, irredundant tree decomposition will be computed.
-    Returns
-    -------
-    clique_tree : nx.Graph
-        each node contains a subhypergraph of `hg`
-    '''
-    org_hg = hg.copy()
-    ident_node_dict = hg.get_identical_node_dict()
-    clique_tree = CliqueTree(org_hg)
-    clique_tree.add_node(0, subhg=org_hg)
-    root_node = 0
-    # construct a clique tree using HRG
-    success_any = True
-    while success_any:
-        success_any = False
-        for each_prod_rule in hrg.prod_rule_list:
-            org_hg, success, subhg = each_prod_rule.revert(org_hg, True)
-            if success:
-                if each_prod_rule.is_start_rule: root_node = clique_tree.number_of_nodes()
-                success_any = True
-                subhg.remove_edges_with_attr({'terminal' : False})
-                clique_tree.root_hg = org_hg
-                clique_tree.insert_subhg(subhg)
-    clique_tree.root_hg = org_hg
-    for each_edge in deepcopy(org_hg.edges):
-        if not org_hg.edge_attr(each_edge)['terminal']:
-            node_list = org_hg.nodes_in_edge(each_edge)
-            org_hg.remove_edge(each_edge)
-            for each_node_1, each_node_2 in combinations(node_list, 2):
-                if not org_hg.is_adj(each_node_1, each_node_2):
-                    org_hg.add_edge([each_node_1, each_node_2], attr_dict=dict(tmp=True))
-    # construct a clique tree using the existing algorithm
-    degree_dict = org_hg.degrees()
-    if degree_dict:
-        while True:
-            min_deg_node, min_deg_subhg = _get_min_deg_node(org_hg, ident_node_dict)
-            if org_hg.nodes == min_deg_subhg.nodes: break
-            # org_hg and min_deg_subhg are divided
-            clique_tree.node_update(min_deg_node, min_deg_subhg)
-    clique_tree.root_hg.remove_edges_with_attr({'tmp' : True})
-    if irredundant:
-        clique_tree.to_irredundant()
-    if return_root:
-        if root_node == 0 and 0 not in clique_tree.nodes:
-            root_node = clique_tree.number_of_nodes()
-            while root_node not in clique_tree.nodes:
-                root_node -= 1
-        elif root_node not in clique_tree.nodes:
-            while root_node not in clique_tree.nodes:
-                root_node -= 1
-        else:
-            pass
-        return clique_tree, root_node
-    else:
-        return clique_tree
-def tree_decomposition_from_leaf(hg, irredundant=True):
-    """ compute a tree decomposition of the input hypergraph
-    Parameters
-    ----------
-    hg : Hypergraph
-        hypergraph to be decomposed
-    irredundant : bool
-        if True, irredundant tree decomposition will be computed.
-    Returns
-    -------
-    clique_tree : nx.Graph
-        each node contains a subhypergraph of `hg`
-    """
-    def apply_normal_decomposition(clique_tree):
-        degree_dict = clique_tree.root_hg.degrees()
-        min_deg_node = min(degree_dict, key=degree_dict.get)
-        min_deg_subhg = clique_tree.root_hg.adj_subhg(min_deg_node, clique_tree.ident_node_dict)
-        if clique_tree.root_hg.nodes == min_deg_subhg.nodes:
-            return clique_tree, False
-        clique_tree.node_update(min_deg_node, min_deg_subhg)
-        return clique_tree, True
-    def apply_min_edge_deg_decomposition(clique_tree):
-        edge_degree_dict = clique_tree.root_hg.edge_degrees()
-        non_tmp_edge_list = [each_edge for each_edge in clique_tree.root_hg.edges \
-                             if not clique_tree.root_hg.edge_attr(each_edge).get('tmp')]
-        if not non_tmp_edge_list:
-            return clique_tree, False
-        min_deg_edge = None
-        min_deg = np.inf
-        for each_edge in non_tmp_edge_list:
-            if min_deg > edge_degree_dict[each_edge]:
-                min_deg_edge = each_edge
-                min_deg = edge_degree_dict[each_edge]
-        node_list = clique_tree.root_hg.nodes_in_edge(min_deg_edge)
-        min_deg_subhg = clique_tree.root_hg.get_subhg(
-            node_list, [min_deg_edge], clique_tree.ident_node_dict)
-        if clique_tree.root_hg.nodes == min_deg_subhg.nodes:
-            return clique_tree, False
-        clique_tree.update(min_deg_subhg)
-        return clique_tree, True
-    org_hg = hg.copy()
-    clique_tree = CliqueTree(org_hg)
-    clique_tree.add_node(0, subhg=org_hg)
-    success = True
-    while success:
-        clique_tree, success = apply_min_edge_deg_decomposition(clique_tree)
-        if not success:
-            clique_tree, success = apply_normal_decomposition(clique_tree)
-    clique_tree.root_hg.remove_edges_with_attr({'tmp' : True})
-    if irredundant:
-        clique_tree.to_irredundant()
-    return clique_tree
-def topological_tree_decomposition(
-        hg, irredundant=True, rip_labels=True, shrink_cycle=False, contract_cycles=False):
-    ''' compute a tree decomposition of the input hypergraph
-    Parameters
-    ----------
-    hg : Hypergraph
-        hypergraph to be decomposed
-    irredundant : bool
-        if True, irredundant tree decomposition will be computed.
-    Returns
-    -------
-    clique_tree : CliqueTree
-        each node contains a subhypergraph of `hg`
-    '''
-    def _contract_tree(clique_tree):
-        ''' contract a single leaf
-        Parameters
-        ----------
-        clique_tree : CliqueTree
-        Returns
-        -------
-        CliqueTree, bool
-            bool represents whether this operation succeeds or not.
-        '''
-        edge_degree_dict = clique_tree.root_hg.edge_degrees()
-        leaf_edge_list = [each_edge for each_edge in clique_tree.root_hg.edges \
-                          if (not clique_tree.root_hg.edge_attr(each_edge).get('tmp'))\
-                          and edge_degree_dict[each_edge] == 1]
-        if not leaf_edge_list:
-            return clique_tree, False
-        min_deg_edge = leaf_edge_list[0]
-        node_list = clique_tree.root_hg.nodes_in_edge(min_deg_edge)
-        min_deg_subhg = clique_tree.root_hg.get_subhg(
-            node_list, [min_deg_edge], clique_tree.ident_node_dict)
-        if clique_tree.root_hg.nodes == min_deg_subhg.nodes:
-            return clique_tree, False
-        clique_tree.update(min_deg_subhg)
-        return clique_tree, True
-    def _rip_labels_from_cycles(clique_tree, org_hg):
-        ''' rip hyperedge-labels off
-        Parameters
-        ----------
-        clique_tree : CliqueTree
-        org_hg : Hypergraph
-        Returns
-        -------
-        CliqueTree, bool
-            bool represents whether this operation succeeds or not.
-        '''
-        ident_node_dict = clique_tree.ident_node_dict #hg.get_identical_node_dict()
-        for each_edge in clique_tree.root_hg.edges:
-            if each_edge in org_hg.edges:
-                if org_hg.in_cycle(each_edge):
-                    node_list = clique_tree.root_hg.nodes_in_edge(each_edge)
-                    subhg = clique_tree.root_hg.get_subhg(
-                        node_list, [each_edge], ident_node_dict)
-                    if clique_tree.root_hg.nodes == subhg.nodes:
-                        return clique_tree, False
-                    clique_tree.update(subhg)
-                    '''
-                    in_cycle_dict = {each_node: org_hg.node_attr(each_node)['is_in_ring'] for each_node in node_list}
-                    if not all(in_cycle_dict.values()):
-                        node_not_in_cycle = [each_node for each_node in in_cycle_dict.keys() if not in_cycle_dict[each_node]][0]
-                        node_list = [node_not_in_cycle]
-                        node_list.extend(clique_tree.root_hg.adj_nodes(node_not_in_cycle))
-                        edge_list = clique_tree.root_hg.adj_edges(node_not_in_cycle)
-                        import pdb; pdb.set_trace()
-                        subhg = clique_tree.root_hg.get_subhg(
-                            node_list, edge_list, ident_node_dict)
-                        clique_tree.update(subhg)
-                    '''
-                    return clique_tree, True
-        return clique_tree, False
-    def _shrink_cycle(clique_tree):
-        ''' shrink a cycle
-        Parameters
-        ----------
-        clique_tree : CliqueTree
-        Returns
-        -------
-        CliqueTree, bool
-            bool represents whether this operation succeeds or not.
-        '''
-        def filter_subhg(subhg, hg, key_node):
-            num_nodes_cycle = 0
-            nodes_in_cycle_list = []
-            for each_node in subhg.nodes:
-                if hg.in_cycle(each_node):
-                    num_nodes_cycle += 1
-                    if each_node != key_node:
-                        nodes_in_cycle_list.append(each_node)
-                if num_nodes_cycle > 3:
-                    break
-            if num_nodes_cycle != 3:
-                return False
-            else:
-                for each_edge in hg.edges:
-                    if set(nodes_in_cycle_list).issubset(hg.nodes_in_edge(each_edge)):
-                        return False
-                return True
-        #ident_node_dict = hg.get_identical_node_dict()
-        ident_node_dict = clique_tree.ident_node_dict
-        for each_node in clique_tree.root_hg.nodes:
-            if clique_tree.root_hg.in_cycle(each_node)\
-               and filter_subhg(clique_tree.root_hg.adj_subhg(each_node, ident_node_dict),
-                                clique_tree.root_hg,
-                                each_node):
-                target_node = each_node
-                target_subhg = clique_tree.root_hg.adj_subhg(target_node, ident_node_dict)
-                if clique_tree.root_hg.nodes == target_subhg.nodes:
-                    return clique_tree, False
-                clique_tree.update(target_subhg)
-                return clique_tree, True
-        return clique_tree, False
-    def _contract_cycles(clique_tree):
-        '''
-        remove a subhypergraph that looks like a cycle on a leaf.
-        Parameters
-        ----------
-        clique_tree : CliqueTree
-        Returns
-        -------
-        CliqueTree, bool
-            bool represents whether this operation succeeds or not.
-        '''
-        def _divide_hg(hg):
-            ''' divide a hypergraph into subhypergraphs such that
-            each subhypergraph is connected to each other in a tree-like way.
-            Parameters
-            ----------
-            hg : Hypergraph
-            Returns
-            -------
-            list of Hypergraphs
-                each element corresponds to a subhypergraph of `hg`
-            '''
-            for each_node in hg.nodes:
-                if hg.is_dividable(each_node):
-                    adj_edges_dict = {each_edge: hg.in_cycle(each_edge) for each_edge in hg.adj_edges(each_node)}
-                    '''
-                    if any(adj_edges_dict.values()):
-                        import pdb; pdb.set_trace()
-                        edge_in_cycle = [each_key for each_key, each_val in adj_edges_dict.items() if each_val][0]
-                        subhg1, subhg2, subhg3 = hg.divide(each_node, edge_in_cycle)
-                        return _divide_hg(subhg1) + _divide_hg(subhg2) + _divide_hg(subhg3)
-                    else:
-                    '''
-                    subhg1, subhg2 = hg.divide(each_node)
-                    return _divide_hg(subhg1) + _divide_hg(subhg2)
-            return [hg]
-        def _is_leaf(hg, divided_subhg) -> bool:
-            ''' judge whether subhg is a leaf-like in the original hypergraph
-            Parameters
-            ----------
-            hg : Hypergraph
-            divided_subhg : Hypergraph
-                `divided_subhg` is a subhypergraph of `hg`
-            Returns
-            -------
-            bool
-            '''
-            '''
-            adj_edges_set = set([])
-            for each_node in divided_subhg.nodes:
-                adj_edges_set.update(set(hg.adj_edges(each_node)))
-            _hg = deepcopy(hg)
-            _hg.remove_subhg(divided_subhg)
-            if nx.is_connected(_hg.hg) != (len(adj_edges_set - divided_subhg.edges) == 1):
-                import pdb; pdb.set_trace()
-            return len(adj_edges_set - divided_subhg.edges) == 1
-            '''
-            _hg = deepcopy(hg)
-            _hg.remove_subhg(divided_subhg)
-            return nx.is_connected(_hg.hg)
-        subhg_list = _divide_hg(clique_tree.root_hg)
-        if len(subhg_list) == 1:
-            return clique_tree, False
-        else:
-            while len(subhg_list) > 1:
-                max_leaf_subhg = None
-                for each_subhg in subhg_list:
-                    if _is_leaf(clique_tree.root_hg, each_subhg):
-                        if max_leaf_subhg is None:
-                            max_leaf_subhg = each_subhg
-                        elif max_leaf_subhg.num_nodes < each_subhg.num_nodes:
-                            max_leaf_subhg = each_subhg
-                clique_tree.update(max_leaf_subhg)
-                subhg_list.remove(max_leaf_subhg)
-            return clique_tree, True
-    org_hg = hg.copy()
-    clique_tree = CliqueTree(org_hg)
-    clique_tree.add_node(0, subhg=org_hg)
-    success = True
-    while success:
-        '''
-        clique_tree, success = _rip_labels_from_cycles(clique_tree, hg)
-        if not success:
-            clique_tree, success = _contract_cycles(clique_tree)
-        '''
-        clique_tree, success = _contract_tree(clique_tree)
-        if not success:
-            if rip_labels:
-                clique_tree, success = _rip_labels_from_cycles(clique_tree, hg)
-            if not success:
-                if shrink_cycle:
-                    clique_tree, success = _shrink_cycle(clique_tree)
-                if not success:
-                    if contract_cycles:
-                        clique_tree, success = _contract_cycles(clique_tree)
-    clique_tree.root_hg.remove_edges_with_attr({'tmp' : True})
-    if irredundant:
-        clique_tree.to_irredundant()
-    return clique_tree
-def molecular_tree_decomposition(hg, irredundant=True):
-    """ compute a tree decomposition of the input molecular hypergraph
-    Parameters
-    ----------
-    hg : Hypergraph
-        molecular hypergraph to be decomposed
-    irredundant : bool
-        if True, irredundant tree decomposition will be computed.
-    Returns
-    -------
-    clique_tree : CliqueTree
-        each node contains a subhypergraph of `hg`
-    """
-    def _divide_hg(hg):
-        ''' divide a hypergraph into subhypergraphs such that
-        each subhypergraph is connected to each other in a tree-like way.
-        Parameters
-        ----------
-        hg : Hypergraph
-        Returns
-        -------
-        list of Hypergraphs
-            each element corresponds to a subhypergraph of `hg`
-        '''
-        is_ring = False
-        for each_node in hg.nodes:
-            if hg.node_attr(each_node)['is_in_ring']:
-                is_ring = True
-            if not hg.node_attr(each_node)['is_in_ring'] \
-               and hg.degree(each_node) == 2:
-                subhg1, subhg2 = hg.divide(each_node)
-                return _divide_hg(subhg1) + _divide_hg(subhg2)
-        if is_ring:
-            subhg_list = []
-            remove_edge_list = []
-            remove_node_list = []
-            for each_edge in hg.edges:
-                node_list = hg.nodes_in_edge(each_edge)
-                subhg = hg.get_subhg(node_list, [each_edge], hg.get_identical_node_dict())
-                subhg_list.append(subhg)
-                remove_edge_list.append(each_edge)
-                for each_node in node_list:
-                    if not hg.node_attr(each_node)['is_in_ring']:
-                        remove_node_list.append(each_node)
-            hg.remove_edges(remove_edge_list)
-            hg.remove_nodes(remove_node_list, False)
-            return subhg_list + [hg]
-        else:
-            return [hg]
-    org_hg = hg.copy()
-    clique_tree = CliqueTree(org_hg)
-    clique_tree.add_node(0, subhg=org_hg)
-    subhg_list = _divide_hg(deepcopy(clique_tree.root_hg))
-    #_subhg_list = deepcopy(subhg_list)
-    if len(subhg_list) == 1:
-        pass
-    else:
-        while len(subhg_list) > 1:
-            max_leaf_subhg = None
-            for each_subhg in subhg_list:
-                if _is_leaf(clique_tree.root_hg, each_subhg) and not _is_ring(each_subhg):
-                    if max_leaf_subhg is None:
-                        max_leaf_subhg = each_subhg
-                    elif max_leaf_subhg.num_nodes < each_subhg.num_nodes:
-                        max_leaf_subhg = each_subhg
-            if max_leaf_subhg is None:
-                for each_subhg in subhg_list:
-                    if _is_ring_label(clique_tree.root_hg, each_subhg):
-                        if max_leaf_subhg is None:
-                            max_leaf_subhg = each_subhg
-                        elif max_leaf_subhg.num_nodes < each_subhg.num_nodes:
-                            max_leaf_subhg = each_subhg
-            if max_leaf_subhg is not None:
-                clique_tree.update(max_leaf_subhg)
-                subhg_list.remove(max_leaf_subhg)
-            else:
-                for each_subhg in subhg_list:
-                    if _is_leaf(clique_tree.root_hg, each_subhg):
-                        if max_leaf_subhg is None:
-                            max_leaf_subhg = each_subhg
-                        elif max_leaf_subhg.num_nodes < each_subhg.num_nodes:
-                            max_leaf_subhg = each_subhg
-                if max_leaf_subhg is not None:
-                    clique_tree.update(max_leaf_subhg, True)
-                    subhg_list.remove(max_leaf_subhg)
-                else:
-                    break
-    if len(subhg_list) > 1:
-        '''
-        for each_idx, each_subhg in enumerate(subhg_list):
-            each_subhg.draw(f'{each_idx}', True)
-        clique_tree.root_hg.draw('root', True)
-        import pickle
-        with open('buggy_hg.pkl', 'wb') as f:
-            pickle.dump(hg, f)
-        return clique_tree, subhg_list, _subhg_list
-        '''
-        raise RuntimeError('bug in tree decomposition algorithm')
-    clique_tree.root_hg.remove_edges_with_attr({'tmp' : True})
-    '''
-    for each_tree_node in clique_tree.adj[0]:
-        subhg = clique_tree.nodes[each_tree_node]['subhg']
-        for each_edge in subhg.edges:
-            if set(subhg.nodes_in_edge(each_edge)).issubset(clique_tree.root_hg.nodes):
-                clique_tree.root_hg.add_edge(set(subhg.nodes_in_edge(each_edge)), attr_dict=dict(tmp=True))
-    '''
-    if irredundant:
-        clique_tree.to_irredundant()
-    return clique_tree #, _subhg_list
-def _is_leaf(hg, subhg) -> bool:
-    ''' judge whether subhg is a leaf-like in the original hypergraph
-    Parameters
-    ----------
-    hg : Hypergraph
-    subhg : Hypergraph
-        `subhg` is a subhypergraph of `hg`
-    Returns
-    -------
-    bool
-    '''
-    if len(subhg.edges) == 0:
-        adj_edge_set = set([])
-        subhg_edge_set = set([])
-        for each_edge in hg.edges:
-            if set(hg.nodes_in_edge(each_edge)).issubset(subhg.nodes) and hg.edge_attr(each_edge).get('tmp', False):
-                subhg_edge_set.add(each_edge)
-        for each_node in subhg.nodes:
-            adj_edge_set.update(set(hg.adj_edges(each_node)))
-        if subhg_edge_set.issubset(adj_edge_set) and len(adj_edge_set.difference(subhg_edge_set)) == 1:
-            return True
-        else:
-            return False
-    elif len(subhg.edges) == 1:
-        adj_edge_set = set([])
-        subhg_edge_set = subhg.edges
-        for each_node in subhg.nodes:
-            for each_adj_edge in hg.adj_edges(each_node):
-                adj_edge_set.add(each_adj_edge)
-        if subhg_edge_set.issubset(adj_edge_set) and len(adj_edge_set.difference(subhg_edge_set)) == 1:
-            return True
-        else:
-            return False
-    else:
-        raise ValueError('subhg should be nodes only or one-edge hypergraph.')
-def _is_ring_label(hg, subhg):
-    if len(subhg.edges) != 1:
-        return False
-    edge_name = list(subhg.edges)[0]
-    #assert edge_name in hg.edges, f'{edge_name}'
-    is_in_ring = False
-    for each_node in subhg.nodes:
-        if subhg.node_attr(each_node)['is_in_ring']:
-            is_in_ring = True
-        else:
-            adj_edge_list = list(hg.adj_edges(each_node))
-            adj_edge_list.remove(edge_name)
-            if len(adj_edge_list) == 1:
-                if not hg.edge_attr(adj_edge_list[0]).get('tmp', False):
-                    return False
-            elif len(adj_edge_list) == 0:
-                pass
-            else:
-                raise ValueError
-    if is_in_ring:
-        return True
-    else:
-        return False
-def _is_ring(hg):
-    for each_node in hg.nodes:
-        if not hg.node_attr(each_node)['is_in_ring']:
-            return False
-    return True

graph_grammar/graph_grammar/__init__.py DELETED Viewed

@@ -1,20 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# Rhizome
-# Version beta 0.0, August 2023
-# Property of IBM Research, Accelerated Discovery
-#
-"""
-PLEASE NOTE THIS IMPLEMENTATION INCLUDES THE ORIGINAL SOURCE CODE (AND SOME ADAPTATIONS)
-OF THE MHG IMPLEMENTATION OF HIROSHI KAJINO AT IBM TRL ALREADY PUBLICLY AVAILABLE.
-THIS MIGHT INFLUENCE THE DECISION OF THE FINAL LICENSE SO CAREFUL CHECK NEEDS BE DONE.
-"""
-""" Title """
-__author__ = "Hiroshi Kajino <KAJINO@jp.ibm.com>"
-__copyright__ = "(c) Copyright IBM Corp. 2018"
-__version__ = "0.1"
-__date__ = "Jan 1 2018"

graph_grammar/graph_grammar/__pycache__/__init__.cpython-310.pyc DELETED Viewed

Binary file (680 Bytes)

graph_grammar/graph_grammar/__pycache__/base.cpython-310.pyc DELETED Viewed

Binary file (1.17 kB)

graph_grammar/graph_grammar/__pycache__/corpus.cpython-310.pyc DELETED Viewed

Binary file (4.71 kB)

graph_grammar/graph_grammar/__pycache__/hrg.cpython-310.pyc DELETED Viewed

Binary file (29.1 kB)

graph_grammar/graph_grammar/__pycache__/symbols.cpython-310.pyc DELETED Viewed

Binary file (5.38 kB)

graph_grammar/graph_grammar/__pycache__/utils.cpython-310.pyc DELETED Viewed

Binary file (3.63 kB)

graph_grammar/graph_grammar/base.py DELETED Viewed

@@ -1,30 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# Rhizome
-# Version beta 0.0, August 2023
-# Property of IBM Research, Accelerated Discovery
-#
-"""
-PLEASE NOTE THIS IMPLEMENTATION INCLUDES THE ORIGINAL SOURCE CODE (AND SOME ADAPTATIONS)
-OF THE MHG IMPLEMENTATION OF HIROSHI KAJINO AT IBM TRL ALREADY PUBLICLY AVAILABLE.
-THIS MIGHT INFLUENCE THE DECISION OF THE FINAL LICENSE SO CAREFUL CHECK NEEDS BE DONE.
-"""
-""" Title """
-__author__ = "Hiroshi Kajino <KAJINO@jp.ibm.com>"
-__copyright__ = "(c) Copyright IBM Corp. 2017"
-__version__ = "0.1"
-__date__ = "Dec 11 2017"
-from abc import ABCMeta, abstractmethod
-class GraphGrammarBase(metaclass=ABCMeta):
-    @abstractmethod
-    def learn(self):
-        pass
-    @abstractmethod
-    def sample(self):
-        pass

graph_grammar/graph_grammar/corpus.py DELETED Viewed

@@ -1,152 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# Rhizome
-# Version beta 0.0, August 2023
-# Property of IBM Research, Accelerated Discovery
-#
-"""
-PLEASE NOTE THIS IMPLEMENTATION INCLUDES THE ORIGINAL SOURCE CODE (AND SOME ADAPTATIONS)
-OF THE MHG IMPLEMENTATION OF HIROSHI KAJINO AT IBM TRL ALREADY PUBLICLY AVAILABLE.
-THIS MIGHT INFLUENCE THE DECISION OF THE FINAL LICENSE SO CAREFUL CHECK NEEDS BE DONE.
-"""
-""" Title """
-__author__ = "Hiroshi Kajino <KAJINO@jp.ibm.com>"
-__copyright__ = "(c) Copyright IBM Corp. 2018"
-__version__ = "0.1"
-__date__ = "Jun 4 2018"
-from collections import Counter
-from functools import partial
-from .utils import _easy_node_match, _edge_match, _node_match, common_node_list, _node_match_prod_rule
-from networkx.algorithms.isomorphism import GraphMatcher
-import os
-class CliqueTreeCorpus(object):
-    ''' clique tree corpus
-    Attributes
-    ----------
-    clique_tree_list : list of CliqueTree
-    subhg_list : list of Hypergraph
-    '''
-    def __init__(self):
-        self.clique_tree_list = []
-        self.subhg_list = []
-    @property
-    def size(self):
-        return len(self.subhg_list)
-    def add_clique_tree(self, clique_tree):
-        for each_node in clique_tree.nodes:
-            subhg = clique_tree.nodes[each_node]['subhg']
-            subhg_idx = self.add_subhg(subhg)
-            clique_tree.nodes[each_node]['subhg_idx'] = subhg_idx
-        self.clique_tree_list.append(clique_tree)
-    def add_to_subhg_list(self, clique_tree, root_node):
-        parent_node_dict = {}
-        current_node = None
-        parent_node_dict[root_node] = None
-        stack = [root_node]
-        while stack:
-            current_node = stack.pop()
-            current_subhg = clique_tree.nodes[current_node]['subhg']
-            for each_child in clique_tree.adj[current_node]:
-                if each_child != parent_node_dict[current_node]:
-                    stack.append(each_child)
-                    parent_node_dict[each_child] = current_node
-            if parent_node_dict[current_node] is not None:
-                parent_subhg = clique_tree.nodes[parent_node_dict[current_node]]['subhg']
-                common, _ = common_node_list(parent_subhg, current_subhg)
-                parent_subhg.add_edge(set(common), attr_dict={'tmp': True})
-        parent_node_dict = {}
-        current_node = None
-        parent_node_dict[root_node] = None
-        stack = [root_node]
-        while stack:
-            current_node = stack.pop()
-            current_subhg = clique_tree.nodes[current_node]['subhg']
-            for each_child in clique_tree.adj[current_node]:
-                if each_child != parent_node_dict[current_node]:
-                    stack.append(each_child)
-                    parent_node_dict[each_child] = current_node
-            if parent_node_dict[current_node] is not None:
-                parent_subhg = clique_tree.nodes[parent_node_dict[current_node]]['subhg']
-                common, _ = common_node_list(parent_subhg, current_subhg)
-                for each_idx, each_node in enumerate(common):
-                    current_subhg.set_node_attr(each_node, {'ext_id': each_idx})
-            subhg_idx, is_new = self.add_subhg(current_subhg)
-            clique_tree.nodes[current_node]['subhg_idx'] = subhg_idx
-        return clique_tree
-    def add_subhg(self, subhg):
-        if len(self.subhg_list) == 0:
-            node_dict = {}
-            for each_node in subhg.nodes:
-                node_dict[each_node] = subhg.node_attr(each_node)['symbol'].__hash__()
-            node_list = []
-            for each_key, _ in sorted(node_dict.items(), key=lambda x:x[1]):
-                node_list.append(each_key)
-            for each_idx, each_node in enumerate(node_list):
-                subhg.node_attr(each_node)['order4hrg'] = each_idx
-            self.subhg_list.append(subhg)
-            return 0, True
-        else:
-            match = False
-            subhg_bond_symbol_counter \
-                = Counter([subhg.node_attr(each_node)['symbol'] \
-                           for each_node in subhg.nodes])
-            subhg_atom_symbol_counter \
-                = Counter([subhg.edge_attr(each_edge).get('symbol', None) \
-                           for each_edge in subhg.edges])
-            for each_idx, each_subhg in enumerate(self.subhg_list):
-                each_bond_symbol_counter \
-                    = Counter([each_subhg.node_attr(each_node)['symbol'] \
-                               for each_node in each_subhg.nodes])
-                each_atom_symbol_counter \
-                    = Counter([each_subhg.edge_attr(each_edge).get('symbol', None) \
-                               for each_edge in each_subhg.edges])
-                if not match \
-                   and (subhg.num_nodes == each_subhg.num_nodes
-                        and subhg.num_edges == each_subhg.num_edges
-                        and subhg_bond_symbol_counter == each_bond_symbol_counter
-                        and subhg_atom_symbol_counter == each_atom_symbol_counter):
-                    gm = GraphMatcher(each_subhg.hg,
-                                      subhg.hg,
-                                      node_match=_easy_node_match,
-                                      edge_match=_edge_match)
-                    try:
-                        isomap = next(gm.isomorphisms_iter())
-                        match = True
-                        for each_node in each_subhg.nodes:
-                            subhg.node_attr(isomap[each_node])['order4hrg'] \
-                                = each_subhg.node_attr(each_node)['order4hrg']
-                            if 'ext_id' in each_subhg.node_attr(each_node):
-                                subhg.node_attr(isomap[each_node])['ext_id'] \
-                                    = each_subhg.node_attr(each_node)['ext_id']
-                        return each_idx, False
-                    except StopIteration:
-                        match = False
-            if not match:
-                node_dict = {}
-                for each_node in subhg.nodes:
-                    node_dict[each_node] = subhg.node_attr(each_node)['symbol'].__hash__()
-                node_list = []
-                for each_key, _ in sorted(node_dict.items(), key=lambda x:x[1]):
-                    node_list.append(each_key)
-                for each_idx, each_node in enumerate(node_list):
-                    subhg.node_attr(each_node)['order4hrg'] = each_idx
-                #for each_idx, each_node in enumerate(subhg.nodes):
-                #    subhg.node_attr(each_node)['order4hrg'] = each_idx
-                self.subhg_list.append(subhg)
-                return len(self.subhg_list) - 1, True

graph_grammar/graph_grammar/hrg.py DELETED Viewed

@@ -1,1065 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# Rhizome
-# Version beta 0.0, August 2023
-# Property of IBM Research, Accelerated Discovery
-#
-"""
-PLEASE NOTE THIS IMPLEMENTATION INCLUDES THE ORIGINAL SOURCE CODE (AND SOME ADAPTATIONS)
-OF THE MHG IMPLEMENTATION OF HIROSHI KAJINO AT IBM TRL ALREADY PUBLICLY AVAILABLE.
-THIS MIGHT INFLUENCE THE DECISION OF THE FINAL LICENSE SO CAREFUL CHECK NEEDS BE DONE.
-"""
-""" Title """
-__author__ = "Hiroshi Kajino <KAJINO@jp.ibm.com>"
-__copyright__ = "(c) Copyright IBM Corp. 2017"
-__version__ = "0.1"
-__date__ = "Dec 11 2017"
-from .corpus import CliqueTreeCorpus
-from .base import GraphGrammarBase
-from .symbols import TSymbol, NTSymbol, BondSymbol
-from .utils import _node_match, _node_match_prod_rule, _edge_match, masked_softmax, common_node_list
-from ..hypergraph import Hypergraph
-from collections import Counter
-from copy import deepcopy
-from ..algo.tree_decomposition import (
-    tree_decomposition,
-    tree_decomposition_with_hrg,
-    tree_decomposition_from_leaf,
-    topological_tree_decomposition,
-    molecular_tree_decomposition)
-from functools import partial
-from networkx.algorithms.isomorphism import GraphMatcher
-from typing import List, Dict, Tuple
-import networkx as nx
-import numpy as np
-import torch
-import os
-import random
-DEBUG = False
-class ProductionRule(object):
-    """ A class of a production rule
-    Attributes
-    ----------
-    lhs : Hypergraph or None
-        the left hand side of the production rule.
-        if None, the rule is a starting rule.
-    rhs : Hypergraph
-        the right hand side of the production rule.
-    """
-    def __init__(self, lhs, rhs):
-        self.lhs = lhs
-        self.rhs = rhs
-    @property
-    def is_start_rule(self) -> bool:
-        return self.lhs.num_nodes == 0
-    @property
-    def ext_node(self) -> Dict[int, str]:
-        """ return a dict of external nodes
-        """
-        if self.is_start_rule:
-            return {}
-        else:
-            ext_node_dict = {}
-            for each_node in self.lhs.nodes:
-                ext_node_dict[self.lhs.node_attr(each_node)["ext_id"]] = each_node
-            return ext_node_dict
-    @property
-    def lhs_nt_symbol(self) -> NTSymbol:
-        if self.is_start_rule:
-            return NTSymbol(degree=0, is_aromatic=False, bond_symbol_list=[])
-        else:
-            return self.lhs.edge_attr(list(self.lhs.edges)[0])['symbol']
-    def rhs_adj_mat(self, node_edge_list):
-        ''' return the adjacency matrix of rhs of the production rule
-        '''
-        return nx.adjacency_matrix(self.rhs.hg, node_edge_list)
-    def draw(self, file_path=None):
-        return self.rhs.draw(file_path)
-    def is_same(self, prod_rule, ignore_order=False):
-        """ judge whether this production rule is
-        the same as the input one, `prod_rule`
-        Parameters
-        ----------
-        prod_rule : ProductionRule
-            production rule to be compared
-        Returns
-        -------
-        is_same : bool
-        isomap : dict
-            isomorphism of nodes and hyperedges.
-            ex) {'bond_42': 'bond_37', 'bond_2': 'bond_1',
-                 'e36': 'e11', 'e16': 'e12', 'e25': 'e18',
-                 'bond_40': 'bond_38', 'e26': 'e21', 'bond_41': 'bond_39'}.
-            key comes from `prod_rule`, value comes from `self`.
-        """
-        if self.is_start_rule:
-            if not prod_rule.is_start_rule:
-                return False, {}
-        else:
-            if prod_rule.is_start_rule:
-                return False, {}
-            else:
-                if prod_rule.lhs.num_nodes != self.lhs.num_nodes:
-                    return False, {}
-        if prod_rule.rhs.num_nodes != self.rhs.num_nodes:
-            return False, {}
-        if prod_rule.rhs.num_edges != self.rhs.num_edges:
-            return False, {}
-        subhg_bond_symbol_counter \
-            = Counter([prod_rule.rhs.node_attr(each_node)['symbol'] \
-                       for each_node in prod_rule.rhs.nodes])
-        each_bond_symbol_counter \
-            = Counter([self.rhs.node_attr(each_node)['symbol'] \
-                       for each_node in self.rhs.nodes])
-        if subhg_bond_symbol_counter != each_bond_symbol_counter:
-            return False, {}
-        subhg_atom_symbol_counter \
-            = Counter([prod_rule.rhs.edge_attr(each_edge)['symbol'] \
-                       for each_edge in prod_rule.rhs.edges])
-        each_atom_symbol_counter \
-            = Counter([self.rhs.edge_attr(each_edge)['symbol'] \
-                       for each_edge in self.rhs.edges])
-        if subhg_atom_symbol_counter != each_atom_symbol_counter:
-            return False, {}
-        gm = GraphMatcher(prod_rule.rhs.hg,
-                          self.rhs.hg,
-                          partial(_node_match_prod_rule,
-                                  ignore_order=ignore_order),
-                          partial(_edge_match,
-                                  ignore_order=ignore_order))
-        try:
-            return True, next(gm.isomorphisms_iter())
-        except StopIteration:
-            return False, {}
-    def applied_to(self,
-                   hg: Hypergraph,
-                   edge: str) -> Tuple[Hypergraph, List[str]]:
-        """ augment `hg` by replacing `edge` with `self.rhs`.
-        Parameters
-        ----------
-        hg : Hypergraph
-        edge : str
-            `edge` must belong to `hg`
-        Returns
-        -------
-        hg : Hypergraph
-            resultant hypergraph
-        nt_edge_list : list
-            list of non-terminal edges
-        """
-        nt_edge_dict = {}
-        if self.is_start_rule:
-            if (edge is not None) or (hg is not None):
-                ValueError("edge and hg must be None for this prod rule.")
-            hg = Hypergraph()
-            node_map_rhs = {} # node id in rhs -> node id in hg, where rhs is augmented.
-            for num_idx, each_node in enumerate(self.rhs.nodes):
-                hg.add_node(f"bond_{num_idx}",
-                            #attr_dict=deepcopy(self.rhs.node_attr(each_node)))
-                            attr_dict=self.rhs.node_attr(each_node))
-                node_map_rhs[each_node] = f"bond_{num_idx}"
-            for each_edge in self.rhs.edges:
-                node_list = []
-                for each_node in self.rhs.nodes_in_edge(each_edge):
-                    node_list.append(node_map_rhs[each_node])
-                if isinstance(self.rhs.nodes_in_edge(each_edge), set):
-                    node_list = set(node_list)
-                edge_id = hg.add_edge(
-                    node_list,
-                    #attr_dict=deepcopy(self.rhs.edge_attr(each_edge)))
-                    attr_dict=self.rhs.edge_attr(each_edge))
-                if "nt_idx" in hg.edge_attr(edge_id):
-                    nt_edge_dict[hg.edge_attr(edge_id)["nt_idx"]] = edge_id
-            nt_edge_list = [nt_edge_dict[key] for key in range(len(nt_edge_dict))]
-            return hg, nt_edge_list
-        else:
-            if edge not in hg.edges:
-                raise ValueError("the input hyperedge does not exist.")
-            if hg.edge_attr(edge)["terminal"]:
-                raise ValueError("the input hyperedge is terminal.")
-            if hg.edge_attr(edge)['symbol'] != self.lhs_nt_symbol:
-                print(hg.edge_attr(edge)['symbol'], self.lhs_nt_symbol)
-                raise ValueError("the input hyperedge and lhs have inconsistent number of nodes.")
-            if DEBUG:
-                for node_idx, each_node in enumerate(hg.nodes_in_edge(edge)):
-                    other_node = self.lhs.nodes_in_edge(list(self.lhs.edges)[0])[node_idx]
-                    attr = deepcopy(self.lhs.node_attr(other_node))
-                    attr.pop('ext_id')
-                    if hg.node_attr(each_node) != attr:
-                        raise ValueError('node attributes are inconsistent.')
-            # order of nodes that belong to the non-terminal edge in hg
-            nt_order_dict = {}  # hg_node -> order ("bond_17" : 1)
-            nt_order_dict_inv = {} # order -> hg_node
-            for each_idx, each_node in enumerate(hg.nodes_in_edge(edge)):
-                nt_order_dict[each_node] = each_idx
-                nt_order_dict_inv[each_idx] = each_node
-            # construct a node_map_rhs: rhs -> new hg
-            node_map_rhs = {} # node id in rhs -> node id in hg, where rhs is augmented.
-            node_idx = hg.num_nodes
-            for each_node in self.rhs.nodes:
-                if "ext_id" in self.rhs.node_attr(each_node):
-                    node_map_rhs[each_node] \
-                        = nt_order_dict_inv[
-                            self.rhs.node_attr(each_node)["ext_id"]]
-                else:
-                    node_map_rhs[each_node] = f"bond_{node_idx}"
-                    node_idx += 1
-            # delete non-terminal
-            hg.remove_edge(edge)
-            # add nodes to hg
-            for each_node in self.rhs.nodes:
-                hg.add_node(node_map_rhs[each_node],
-                            attr_dict=self.rhs.node_attr(each_node))
-            # add hyperedges to hg
-            for each_edge in self.rhs.edges:
-                node_list_hg = []
-                for each_node in self.rhs.nodes_in_edge(each_edge):
-                    node_list_hg.append(node_map_rhs[each_node])
-                edge_id = hg.add_edge(
-                    node_list_hg,
-                    attr_dict=self.rhs.edge_attr(each_edge))#deepcopy(self.rhs.edge_attr(each_edge)))
-                if "nt_idx" in hg.edge_attr(edge_id):
-                    nt_edge_dict[hg.edge_attr(edge_id)["nt_idx"]] = edge_id
-            nt_edge_list = [nt_edge_dict[key] for key in range(len(nt_edge_dict))]
-            return hg, nt_edge_list
-    def revert(self, hg: Hypergraph, return_subhg=False):
-        ''' revert applying this production rule.
-        i.e., if there exists a subhypergraph that matches the r.h.s. of this production rule,
-        this method replaces the subhypergraph with a non-terminal hyperedge.
-        Parameters
-        ----------
-        hg : Hypergraph
-            hypergraph to be reverted
-        return_subhg : bool
-            if True, the removed subhypergraph will be returned.
-        Returns
-        -------
-        hg : Hypergraph
-            the resultant hypergraph. if it cannot be reverted, the original one is returned without any replacement.
-        success : bool
-            this indicates whether reverting is successed or not.
-        '''
-        gm = GraphMatcher(hg.hg, self.rhs.hg, node_match=_node_match_prod_rule,
-                          edge_match=_edge_match)
-        try:
-            # in case when the matched subhg is connected to the other part via external nodes and more.
-            not_iso = True
-            while not_iso:
-                isomap = next(gm.subgraph_isomorphisms_iter())
-                adj_node_set = set([]) # reachable nodes from the internal nodes
-                subhg_node_set = set(isomap.keys()) # nodes in subhg
-                for each_node in subhg_node_set:
-                    adj_node_set.add(each_node)
-                    if isomap[each_node] not in self.ext_node.values():
-                        adj_node_set.update(hg.hg.adj[each_node])
-                if adj_node_set == subhg_node_set:
-                    not_iso = False
-                else:
-                    if return_subhg:
-                        return hg, False, Hypergraph()
-                    else:
-                        return hg, False
-            inv_isomap = {v: k for k, v in isomap.items()}
-            '''
-            isomap = {'e35': 'e8', 'bond_13': 'bond_18', 'bond_14': 'bond_19',
-                      'bond_15': 'bond_17', 'e29': 'e23', 'bond_12': 'bond_20'}
-            where keys come from `hg` and values come from `self.rhs`
-            '''
-        except StopIteration:
-            if return_subhg:
-                return hg, False, Hypergraph()
-            else:
-                return hg, False
-        if return_subhg:
-            subhg = Hypergraph()
-            for each_node in hg.nodes:
-                if each_node in isomap:
-                    subhg.add_node(each_node, attr_dict=hg.node_attr(each_node))
-            for each_edge in hg.edges:
-                if each_edge in isomap:
-                    subhg.add_edge(hg.nodes_in_edge(each_edge),
-                                   attr_dict=hg.edge_attr(each_edge),
-                                   edge_name=each_edge)
-            subhg.edge_idx = hg.edge_idx
-        # remove subhg except for the externael nodes
-        for each_key, each_val in isomap.items():
-            if each_key.startswith('e'):
-                hg.remove_edge(each_key)
-        for each_key, each_val in isomap.items():
-            if each_key.startswith('bond_'):
-                if each_val not in self.ext_node.values():
-                    hg.remove_node(each_key)
-        # add non-terminal hyperedge
-        nt_node_list = []
-        for each_ext_id in self.ext_node.keys():
-            nt_node_list.append(inv_isomap[self.ext_node[each_ext_id]])
-        hg.add_edge(nt_node_list,
-                    attr_dict=dict(
-                        terminal=False,
-                        symbol=self.lhs_nt_symbol))
-        if return_subhg:
-            return hg, True, subhg
-        else:
-            return hg, True
-class ProductionRuleCorpus(object):
-    '''
-    A corpus of production rules.
-    This class maintains
-        (i) list of unique production rules,
-        (ii) list of unique edge symbols (both terminal and non-terminal), and
-        (iii) list of unique node symbols.
-    Attributes
-    ----------
-    prod_rule_list : list
-        list of unique production rules
-    edge_symbol_list : list
-        list of unique symbols (including both terminal and non-terminal)
-    node_symbol_list : list
-        list of node symbols
-    nt_symbol_list : list
-        list of unique lhs symbols
-    ext_id_list : list
-        list of ext_ids
-    lhs_in_prod_rule : array
-        a matrix of lhs vs prod_rule (= lhs_in_prod_rule)
-    '''
-    def __init__(self):
-        self.prod_rule_list = []
-        self.edge_symbol_list = []
-        self.edge_symbol_dict = {}
-        self.node_symbol_list = []
-        self.node_symbol_dict = {}
-        self.nt_symbol_list = []
-        self.ext_id_list = []
-        self._lhs_in_prod_rule = None
-        self.lhs_in_prod_rule_row_list = []
-        self.lhs_in_prod_rule_col_list = []
-    @property
-    def lhs_in_prod_rule(self):
-        if self._lhs_in_prod_rule is None:
-            self._lhs_in_prod_rule = torch.sparse.FloatTensor(
-                torch.LongTensor(list(zip(self.lhs_in_prod_rule_row_list, self.lhs_in_prod_rule_col_list))).t(),
-                torch.FloatTensor([1.0]*len(self.lhs_in_prod_rule_col_list)),
-                torch.Size([len(self.nt_symbol_list), len(self.prod_rule_list)])
-            ).to_dense()
-        return self._lhs_in_prod_rule
-    @property
-    def num_prod_rule(self):
-        ''' return the number of production rules
-        Returns
-        -------
-        int : the number of unique production rules
-        '''
-        return len(self.prod_rule_list)
-    @property
-    def start_rule_list(self):
-        ''' return a list of start rules
-        Returns
-        -------
-        list : list of start rules
-        '''
-        start_rule_list = []
-        for each_prod_rule in self.prod_rule_list:
-            if each_prod_rule.is_start_rule:
-                start_rule_list.append(each_prod_rule)
-        return start_rule_list
-    @property
-    def num_edge_symbol(self):
-        return len(self.edge_symbol_list)
-    @property
-    def num_node_symbol(self):
-        return len(self.node_symbol_list)
-    @property
-    def num_ext_id(self):
-        return len(self.ext_id_list)
-    def construct_feature_vectors(self):
-        ''' this method constructs feature vectors for the production rules collected so far.
-        currently, NTSymbol and TSymbol are treated in the same manner.
-        '''
-        feature_id_dict = {}
-        feature_id_dict['TSymbol'] = 0
-        feature_id_dict['NTSymbol'] = 1
-        feature_id_dict['BondSymbol'] = 2
-        for each_edge_symbol in self.edge_symbol_list:
-            for each_attr in each_edge_symbol.__dict__.keys():
-                each_val = each_edge_symbol.__dict__[each_attr]
-                if isinstance(each_val, list):
-                    each_val = tuple(each_val)
-                if (each_attr, each_val) not in feature_id_dict:
-                    feature_id_dict[(each_attr, each_val)] = len(feature_id_dict)
-        for each_node_symbol in self.node_symbol_list:
-            for each_attr in each_node_symbol.__dict__.keys():
-                each_val = each_node_symbol.__dict__[each_attr]
-                if isinstance(each_val, list):
-                    each_val = tuple(each_val)
-                if (each_attr, each_val) not in feature_id_dict:
-                    feature_id_dict[(each_attr, each_val)] = len(feature_id_dict)
-        for each_ext_id in self.ext_id_list:
-            feature_id_dict[('ext_id', each_ext_id)] = len(feature_id_dict)
-        dim = len(feature_id_dict)
-        feature_dict = {}
-        for each_edge_symbol in self.edge_symbol_list:
-            idx_list = []
-            idx_list.append(feature_id_dict[each_edge_symbol.__class__.__name__])
-            for each_attr in each_edge_symbol.__dict__.keys():
-                each_val = each_edge_symbol.__dict__[each_attr]
-                if isinstance(each_val, list):
-                    each_val = tuple(each_val)
-                idx_list.append(feature_id_dict[(each_attr, each_val)])
-            feature = torch.sparse.LongTensor(
-                torch.LongTensor([idx_list]),
-                torch.ones(len(idx_list)),
-                torch.Size([len(feature_id_dict)])
-            )
-            feature_dict[each_edge_symbol] = feature
-        for each_node_symbol in self.node_symbol_list:
-            idx_list = []
-            idx_list.append(feature_id_dict[each_node_symbol.__class__.__name__])
-            for each_attr in each_node_symbol.__dict__.keys():
-                each_val = each_node_symbol.__dict__[each_attr]
-                if isinstance(each_val, list):
-                    each_val = tuple(each_val)
-                idx_list.append(feature_id_dict[(each_attr, each_val)])
-            feature = torch.sparse.LongTensor(
-                torch.LongTensor([idx_list]),
-                torch.ones(len(idx_list)),
-                torch.Size([len(feature_id_dict)])
-            )
-            feature_dict[each_node_symbol] = feature
-        for each_ext_id in self.ext_id_list:
-            idx_list = [feature_id_dict[('ext_id', each_ext_id)]]
-            feature_dict[('ext_id', each_ext_id)] \
-                = torch.sparse.LongTensor(
-                    torch.LongTensor([idx_list]),
-                    torch.ones(len(idx_list)),
-                    torch.Size([len(feature_id_dict)])
-                )
-        return feature_dict, dim
-    def edge_symbol_idx(self, symbol):
-        return self.edge_symbol_dict[symbol]
-    def node_symbol_idx(self, symbol):
-        return self.node_symbol_dict[symbol]
-    def append(self, prod_rule: ProductionRule) -> Tuple[int, ProductionRule]:
-        """ return whether the input production rule is new or not, and its production rule id.
-        Production rules are regarded as the same if
-            i) there exists a one-to-one mapping of nodes and edges, and
-            ii) all the attributes associated with nodes and hyperedges are the same.
-        Parameters
-        ----------
-        prod_rule : ProductionRule
-        Returns
-        -------
-        prod_rule_id : int
-            production rule index. if new, a new index will be assigned.
-        prod_rule : ProductionRule
-        """
-        num_lhs = len(self.nt_symbol_list)
-        for each_idx, each_prod_rule in enumerate(self.prod_rule_list):
-            is_same, isomap = prod_rule.is_same(each_prod_rule)
-            if is_same:
-                # we do not care about edge and node names, but care about the order of non-terminal edges.
-                for key, val in isomap.items(): # key : edges & nodes in each_prod_rule.rhs , val : those in prod_rule.rhs
-                    if key.startswith("bond_"):
-                        continue
-                    # rewrite `nt_idx` in `prod_rule` for further processing
-                    if "nt_idx" in prod_rule.rhs.edge_attr(val).keys():
-                        if "nt_idx" not in each_prod_rule.rhs.edge_attr(key).keys():
-                            raise ValueError
-                        prod_rule.rhs.set_edge_attr(
-                            val,
-                            {'nt_idx': each_prod_rule.rhs.edge_attr(key)["nt_idx"]})
-                return each_idx, prod_rule
-        self.prod_rule_list.append(prod_rule)
-        self._update_edge_symbol_list(prod_rule)
-        self._update_node_symbol_list(prod_rule)
-        self._update_ext_id_list(prod_rule)
-        lhs_idx = self.nt_symbol_list.index(prod_rule.lhs_nt_symbol)
-        self.lhs_in_prod_rule_row_list.append(lhs_idx)
-        self.lhs_in_prod_rule_col_list.append(len(self.prod_rule_list)-1)
-        self._lhs_in_prod_rule = None
-        return len(self.prod_rule_list)-1, prod_rule
-    def get_prod_rule(self, prod_rule_idx: int) -> ProductionRule:
-        return self.prod_rule_list[prod_rule_idx]
-    def sample(self, unmasked_logit_array, nt_symbol, deterministic=False):
-        ''' sample a production rule whose lhs is `nt_symbol`, followihng `unmasked_logit_array`.
-        Parameters
-        ----------
-        unmasked_logit_array : array-like, length `num_prod_rule`
-        nt_symbol : NTSymbol
-        '''
-        if not isinstance(unmasked_logit_array, np.ndarray):
-            unmasked_logit_array = unmasked_logit_array.numpy().astype(np.float64)
-        if deterministic:
-            prob = masked_softmax(unmasked_logit_array,
-                                  self.lhs_in_prod_rule[self.nt_symbol_list.index(nt_symbol)].numpy().astype(np.float64))
-            return self.prod_rule_list[np.argmax(prob)]
-        else:
-            return np.random.choice(
-                self.prod_rule_list, 1,
-                p=masked_softmax(unmasked_logit_array,
-                                 self.lhs_in_prod_rule[self.nt_symbol_list.index(nt_symbol)].numpy().astype(np.float64)))[0]
-    def masked_logprob(self, unmasked_logit_array, nt_symbol):
-        if not isinstance(unmasked_logit_array, np.ndarray):
-            unmasked_logit_array = unmasked_logit_array.numpy().astype(np.float64)
-        prob = masked_softmax(unmasked_logit_array,
-                              self.lhs_in_prod_rule[self.nt_symbol_list.index(nt_symbol)].numpy().astype(np.float64))
-        return np.log(prob)
-    def _update_edge_symbol_list(self, prod_rule: ProductionRule):
-        ''' update edge symbol list
-        Parameters
-        ----------
-        prod_rule : ProductionRule
-        '''
-        if prod_rule.lhs_nt_symbol not in self.nt_symbol_list:
-            self.nt_symbol_list.append(prod_rule.lhs_nt_symbol)
-        for each_edge in prod_rule.rhs.edges:
-            if prod_rule.rhs.edge_attr(each_edge)['symbol'] not in self.edge_symbol_dict:
-                edge_symbol_idx = len(self.edge_symbol_list)
-                self.edge_symbol_list.append(prod_rule.rhs.edge_attr(each_edge)['symbol'])
-                self.edge_symbol_dict[prod_rule.rhs.edge_attr(each_edge)['symbol']] = edge_symbol_idx
-            else:
-                edge_symbol_idx = self.edge_symbol_dict[prod_rule.rhs.edge_attr(each_edge)['symbol']]
-            prod_rule.rhs.edge_attr(each_edge)['symbol_idx'] = edge_symbol_idx
-        pass
-    def _update_node_symbol_list(self, prod_rule: ProductionRule):
-        ''' update node symbol list
-        Parameters
-        ----------
-        prod_rule : ProductionRule
-        '''
-        for each_node in prod_rule.rhs.nodes:
-            if prod_rule.rhs.node_attr(each_node)['symbol'] not in self.node_symbol_dict:
-                node_symbol_idx = len(self.node_symbol_list)
-                self.node_symbol_list.append(prod_rule.rhs.node_attr(each_node)['symbol'])
-                self.node_symbol_dict[prod_rule.rhs.node_attr(each_node)['symbol']] = node_symbol_idx
-            else:
-                node_symbol_idx = self.node_symbol_dict[prod_rule.rhs.node_attr(each_node)['symbol']]
-            prod_rule.rhs.node_attr(each_node)['symbol_idx'] = node_symbol_idx
-    def _update_ext_id_list(self, prod_rule: ProductionRule):
-        for each_node in prod_rule.rhs.nodes:
-            if 'ext_id' in prod_rule.rhs.node_attr(each_node):
-                if prod_rule.rhs.node_attr(each_node)['ext_id'] not in self.ext_id_list:
-                    self.ext_id_list.append(prod_rule.rhs.node_attr(each_node)['ext_id'])
-class HyperedgeReplacementGrammar(GraphGrammarBase):
-    """
-    Learn a hyperedge replacement grammar from a set of hypergraphs.
-    Attributes
-    ----------
-    prod_rule_list : list of ProductionRule
-        production rules learned from the input hypergraphs
-    """
-    def __init__(self,
-                 tree_decomposition=molecular_tree_decomposition,
-                 ignore_order=False, **kwargs):
-        from functools import partial
-        self.prod_rule_corpus = ProductionRuleCorpus()
-        self.clique_tree_corpus = CliqueTreeCorpus()
-        self.ignore_order = ignore_order
-        self.tree_decomposition = partial(tree_decomposition, **kwargs)
-    @property
-    def num_prod_rule(self):
-        ''' return the number of production rules
-        Returns
-        -------
-        int : the number of unique production rules
-        '''
-        return self.prod_rule_corpus.num_prod_rule
-    @property
-    def start_rule_list(self):
-        ''' return a list of start rules
-        Returns
-        -------
-        list : list of start rules
-        '''
-        return self.prod_rule_corpus.start_rule_list
-    @property
-    def prod_rule_list(self):
-        return self.prod_rule_corpus.prod_rule_list
-    def learn(self, hg_list, logger=print, max_mol=np.inf, print_freq=500):
-        """ learn from a list of hypergraphs
-        Parameters
-        ----------
-        hg_list : list of Hypergraph
-        Returns
-        -------
-        prod_rule_seq_list : list of integers
-            each element corresponds to a sequence of production rules to generate each hypergraph.
-        """
-        prod_rule_seq_list = []
-        idx = 0
-        for each_idx, each_hg in enumerate(hg_list):
-            clique_tree = self.tree_decomposition(each_hg)
-            # get a pair of myself and children
-            root_node = _find_root(clique_tree)
-            clique_tree = self.clique_tree_corpus.add_to_subhg_list(clique_tree, root_node)
-            prod_rule_seq = []
-            stack = []
-            children = sorted(list(clique_tree[root_node].keys()))
-            # extract a temporary production rule
-            prod_rule = extract_prod_rule(
-                None,
-                clique_tree.nodes[root_node]["subhg"],
-                [clique_tree.nodes[each_child]["subhg"]
-                 for each_child in children],
-                clique_tree.nodes[root_node].get('subhg_idx', None))
-            # update the production rule list
-            prod_rule_id, prod_rule = self.update_prod_rule_list(prod_rule)
-            children = reorder_children(root_node,
-                                        children,
-                                        prod_rule,
-                                        clique_tree)
-            stack.extend([(root_node, each_child) for each_child in children[::-1]])
-            prod_rule_seq.append(prod_rule_id)
-            while len(stack) != 0:
-                # get a triple of parent, myself, and children
-                parent, myself = stack.pop()
-                children = sorted(list(dict(clique_tree[myself]).keys()))
-                children.remove(parent)
-                # extract a temp prod rule
-                prod_rule = extract_prod_rule(
-                    clique_tree.nodes[parent]["subhg"],
-                    clique_tree.nodes[myself]["subhg"],
-                    [clique_tree.nodes[each_child]["subhg"]
-                     for each_child in children],
-                    clique_tree.nodes[myself].get('subhg_idx', None))
-                # update the prod rule list
-                prod_rule_id, prod_rule = self.update_prod_rule_list(prod_rule)
-                children = reorder_children(myself,
-                                            children,
-                                            prod_rule,
-                                            clique_tree)
-                stack.extend([(myself, each_child)
-                              for each_child in children[::-1]])
-                prod_rule_seq.append(prod_rule_id)
-            prod_rule_seq_list.append(prod_rule_seq)
-            if (each_idx+1) % print_freq == 0:
-                msg = f'#(molecules processed)={each_idx+1}\t'\
-                        f'#(production rules)={self.prod_rule_corpus.num_prod_rule}\t#(subhg in corpus)={self.clique_tree_corpus.size}'
-                logger(msg)
-            if each_idx > max_mol:
-                break
-        print(f'corpus_size = {self.clique_tree_corpus.size}')
-        return prod_rule_seq_list
-    def sample(self, z, deterministic=False):
-        """ sample a new hypergraph from HRG.
-        Parameters
-        ----------
-        z : array-like, shape (len, num_prod_rule)
-            logit
-        deterministic : bool
-            if True, deterministic sampling
-        Returns
-        -------
-        Hypergraph
-        """
-        seq_idx = 0
-        stack = []
-        z = z[:, :-1]
-        init_prod_rule = self.prod_rule_corpus.sample(z[0], NTSymbol(degree=0,
-                                                                     is_aromatic=False,
-                                                                     bond_symbol_list=[]),
-                                                      deterministic=deterministic)
-        hg, nt_edge_list = init_prod_rule.applied_to(None, None)
-        stack = deepcopy(nt_edge_list[::-1])
-        while len(stack) != 0 and seq_idx < z.shape[0]-1:
-            seq_idx += 1
-            nt_edge = stack.pop()
-            nt_symbol = hg.edge_attr(nt_edge)['symbol']
-            prod_rule = self.prod_rule_corpus.sample(z[seq_idx], nt_symbol, deterministic=deterministic)
-            hg, nt_edge_list = prod_rule.applied_to(hg, nt_edge)
-            stack.extend(nt_edge_list[::-1])
-        if len(stack) != 0:
-            raise RuntimeError(f'{len(stack)} non-terminals are left.')
-        return hg
-    def construct(self, prod_rule_seq):
-        """ construct a hypergraph following `prod_rule_seq`
-        Parameters
-        ----------
-        prod_rule_seq : list of integers
-            a sequence of production rules.
-        Returns
-        -------
-        UndirectedHypergraph
-        """
-        seq_idx = 0
-        init_prod_rule = self.prod_rule_corpus.get_prod_rule(prod_rule_seq[seq_idx])
-        hg, nt_edge_list = init_prod_rule.applied_to(None, None)
-        stack = deepcopy(nt_edge_list[::-1])
-        while len(stack) != 0:
-            seq_idx += 1
-            nt_edge = stack.pop()
-            hg, nt_edge_list = self.prod_rule_corpus.get_prod_rule(prod_rule_seq[seq_idx]).applied_to(hg, nt_edge)
-            stack.extend(nt_edge_list[::-1])
-        return hg
-    def update_prod_rule_list(self, prod_rule):
-        """ return whether the input production rule is new or not, and its production rule id.
-        Production rules are regarded as the same if
-            i) there exists a one-to-one mapping of nodes and edges, and
-            ii) all the attributes associated with nodes and hyperedges are the same.
-        Parameters
-        ----------
-        prod_rule : ProductionRule
-        Returns
-        -------
-        is_new : bool
-            if True, this production rule is new
-        prod_rule_id : int
-            production rule index. if new, a new index will be assigned.
-        """
-        return self.prod_rule_corpus.append(prod_rule)
-class IncrementalHyperedgeReplacementGrammar(HyperedgeReplacementGrammar):
-    '''
-    This class learns HRG incrementally leveraging the previously obtained production rules.
-    '''
-    def __init__(self, tree_decomposition=tree_decomposition_with_hrg, ignore_order=False):
-        self.prod_rule_list = []
-        self.tree_decomposition = tree_decomposition
-        self.ignore_order = ignore_order
-    def learn(self, hg_list):
-        """ learn from a list of hypergraphs
-        Parameters
-        ----------
-        hg_list : list of UndirectedHypergraph
-        Returns
-        -------
-        prod_rule_seq_list : list of integers
-            each element corresponds to a sequence of production rules to generate each hypergraph.
-        """
-        prod_rule_seq_list = []
-        for each_hg in hg_list:
-            clique_tree, root_node = tree_decomposition_with_hrg(each_hg, self, return_root=True)
-            prod_rule_seq = []
-            stack = []
-            # get a pair of myself and children
-            children = sorted(list(clique_tree[root_node].keys()))
-            # extract a temporary production rule
-            prod_rule = extract_prod_rule(None, clique_tree.nodes[root_node]["subhg"],
-                                          [clique_tree.nodes[each_child]["subhg"] for each_child in children])
-            # update the production rule list
-            prod_rule_id, prod_rule = self.update_prod_rule_list(prod_rule)
-            children = reorder_children(root_node, children, prod_rule, clique_tree)
-            stack.extend([(root_node, each_child) for each_child in children[::-1]])
-            prod_rule_seq.append(prod_rule_id)
-            while len(stack) != 0:
-                # get a triple of parent, myself, and children
-                parent, myself = stack.pop()
-                children = sorted(list(dict(clique_tree[myself]).keys()))
-                children.remove(parent)
-                # extract a temp prod rule
-                prod_rule = extract_prod_rule(
-                    clique_tree.nodes[parent]["subhg"], clique_tree.nodes[myself]["subhg"],
-                    [clique_tree.nodes[each_child]["subhg"] for each_child in children])
-                # update the prod rule list
-                prod_rule_id, prod_rule = self.update_prod_rule_list(prod_rule)
-                children = reorder_children(myself, children, prod_rule, clique_tree)
-                stack.extend([(myself, each_child) for each_child in children[::-1]])
-                prod_rule_seq.append(prod_rule_id)
-            prod_rule_seq_list.append(prod_rule_seq)
-        self._compute_stats()
-        return prod_rule_seq_list
-def reorder_children(myself, children, prod_rule, clique_tree):
-    """ reorder children so that they match the order in `prod_rule`.
-    Parameters
-    ----------
-    myself : int
-    children : list of int
-    prod_rule : ProductionRule
-    clique_tree : nx.Graph
-    Returns
-    -------
-    new_children : list of str
-        reordered children
-    """
-    perm = {} # key : `nt_idx`, val : child
-    for each_edge in prod_rule.rhs.edges:
-        if "nt_idx" in prod_rule.rhs.edge_attr(each_edge).keys():
-            for each_child in children:
-                common_node_set = set(
-                    common_node_list(clique_tree.nodes[myself]["subhg"],
-                                     clique_tree.nodes[each_child]["subhg"])[0])
-                if set(prod_rule.rhs.nodes_in_edge(each_edge)) == common_node_set:
-                    assert prod_rule.rhs.edge_attr(each_edge)["nt_idx"] not in perm
-                    perm[prod_rule.rhs.edge_attr(each_edge)["nt_idx"]] = each_child
-    new_children = []
-    assert len(perm) == len(children)
-    for i in range(len(perm)):
-        new_children.append(perm[i])
-    return new_children
-def extract_prod_rule(parent_hg, myself_hg, children_hg_list, subhg_idx=None):
-    """ extract a production rule from a triple of `parent_hg`, `myself_hg`, and `children_hg_list`.
-    Parameters
-    ----------
-    parent_hg : Hypergraph
-    myself_hg : Hypergraph
-    children_hg_list : list of Hypergraph
-    Returns
-    -------
-    ProductionRule, consisting of
-        lhs : Hypergraph or None
-        rhs : Hypergraph
-    """
-    def _add_ext_node(hg, ext_nodes):
-        """ mark nodes to be external (ordered ids are assigned)
-        Parameters
-        ----------
-        hg : UndirectedHypergraph
-        ext_nodes : list of str
-            list of external nodes
-        Returns
-        -------
-        hg : Hypergraph
-            nodes in `ext_nodes` are marked to be external
-        """
-        ext_id = 0
-        ext_id_exists = []
-        for each_node in ext_nodes:
-            ext_id_exists.append('ext_id' in hg.node_attr(each_node))
-        if ext_id_exists and any(ext_id_exists) != all(ext_id_exists):
-            raise ValueError
-        if not all(ext_id_exists):
-            for each_node in ext_nodes:
-                hg.node_attr(each_node)['ext_id'] = ext_id
-                ext_id += 1
-        return hg
-    def _check_aromatic(hg, node_list):
-        is_aromatic = False
-        node_aromatic_list = []
-        for each_node in node_list:
-            if hg.node_attr(each_node)['symbol'].is_aromatic:
-                is_aromatic = True
-                node_aromatic_list.append(True)
-            else:
-                node_aromatic_list.append(False)
-        return is_aromatic, node_aromatic_list
-    def _check_ring(hg):
-        for each_edge in hg.edges:
-            if not ('tmp' in hg.edge_attr(each_edge) or (not hg.edge_attr(each_edge)['terminal'])):
-                return False
-        return True
-    if parent_hg is None:
-        lhs = Hypergraph()
-        node_list = []
-    else:
-        lhs = Hypergraph()
-        node_list, edge_exists = common_node_list(parent_hg, myself_hg)
-        for each_node in node_list:
-            lhs.add_node(each_node,
-                         deepcopy(myself_hg.node_attr(each_node)))
-        is_aromatic, _ = _check_aromatic(parent_hg, node_list)
-        for_ring = _check_ring(myself_hg)
-        bond_symbol_list = []
-        for each_node in node_list:
-            bond_symbol_list.append(parent_hg.node_attr(each_node)['symbol'])
-        lhs.add_edge(
-            node_list,
-            attr_dict=dict(
-                terminal=False,
-                edge_exists=edge_exists,
-                symbol=NTSymbol(
-                    degree=len(node_list),
-                    is_aromatic=is_aromatic,
-                    bond_symbol_list=bond_symbol_list,
-                    for_ring=for_ring)))
-        try:
-            lhs = _add_ext_node(lhs, node_list)
-        except ValueError:
-            import pdb; pdb.set_trace()
-    rhs = remove_tmp_edge(deepcopy(myself_hg))
-    #rhs = remove_ext_node(rhs)
-    #rhs = remove_nt_edge(rhs)
-    try:
-        rhs = _add_ext_node(rhs, node_list)
-    except ValueError:
-        import pdb; pdb.set_trace()
-    nt_idx = 0
-    if children_hg_list is not None:
-        for each_child_hg in children_hg_list:
-            node_list, edge_exists = common_node_list(myself_hg, each_child_hg)
-            is_aromatic, _ = _check_aromatic(myself_hg, node_list)
-            for_ring = _check_ring(each_child_hg)
-            bond_symbol_list = []
-            for each_node in node_list:
-                bond_symbol_list.append(myself_hg.node_attr(each_node)['symbol'])
-            rhs.add_edge(
-                node_list,
-                attr_dict=dict(
-                    terminal=False,
-                    nt_idx=nt_idx,
-                    edge_exists=edge_exists,
-                    symbol=NTSymbol(degree=len(node_list),
-                                    is_aromatic=is_aromatic,
-                                    bond_symbol_list=bond_symbol_list,
-                                    for_ring=for_ring)))
-            nt_idx += 1
-    prod_rule = ProductionRule(lhs, rhs)
-    prod_rule.subhg_idx = subhg_idx
-    if DEBUG:
-        if sorted(list(prod_rule.ext_node.keys())) \
-           != list(np.arange(len(prod_rule.ext_node))):
-            raise RuntimeError('ext_id is not continuous')
-    return prod_rule
-def _find_root(clique_tree):
-    max_node = None
-    num_nodes_max = -np.inf
-    for each_node in clique_tree.nodes:
-        if clique_tree.nodes[each_node]['subhg'].num_nodes > num_nodes_max:
-            max_node = each_node
-            num_nodes_max = clique_tree.nodes[each_node]['subhg'].num_nodes
-        '''
-        children = sorted(list(clique_tree[each_node].keys()))
-        prod_rule = extract_prod_rule(None,
-                                      clique_tree.nodes[each_node]["subhg"],
-                                      [clique_tree.nodes[each_child]["subhg"]
-                                       for each_child in children])
-        for each_start_rule in start_rule_list:
-            if prod_rule.is_same(each_start_rule):
-                return each_node
-        '''
-    return max_node
-def remove_ext_node(hg):
-    for each_node in hg.nodes:
-        hg.node_attr(each_node).pop('ext_id', None)
-    return hg
-def remove_nt_edge(hg):
-    remove_edge_list = []
-    for each_edge in hg.edges:
-        if not hg.edge_attr(each_edge)['terminal']:
-            remove_edge_list.append(each_edge)
-    hg.remove_edges(remove_edge_list)
-    return hg
-def remove_tmp_edge(hg):
-    remove_edge_list = []
-    for each_edge in hg.edges:
-        if hg.edge_attr(each_edge).get('tmp', False):
-            remove_edge_list.append(each_edge)
-    hg.remove_edges(remove_edge_list)
-    return hg

graph_grammar/graph_grammar/symbols.py DELETED Viewed

@@ -1,180 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# Rhizome
-# Version beta 0.0, August 2023
-# Property of IBM Research, Accelerated Discovery
-#
-"""
-PLEASE NOTE THIS IMPLEMENTATION INCLUDES THE ORIGINAL SOURCE CODE (AND SOME ADAPTATIONS)
-OF THE MHG IMPLEMENTATION OF HIROSHI KAJINO AT IBM TRL ALREADY PUBLICLY AVAILABLE.
-THIS MIGHT INFLUENCE THE DECISION OF THE FINAL LICENSE SO CAREFUL CHECK NEEDS BE DONE.
-"""
-""" Title """
-__author__ = "Hiroshi Kajino <KAJINO@jp.ibm.com>"
-__copyright__ = "(c) Copyright IBM Corp. 2018"
-__version__ = "0.1"
-__date__ = "Jan 1 2018"
-from typing import List
-class TSymbol(object):
-    ''' terminal symbol
-    Attributes
-    ----------
-    degree : int
-        the number of nodes in a hyperedge
-    is_aromatic : bool
-        whether or not the hyperedge is in an aromatic ring
-    symbol : str
-        atomic symbol
-    num_explicit_Hs : int
-        the number of hydrogens associated to this hyperedge
-    formal_charge : int
-        charge
-    chirality : int
-        chirality
-    '''
-    def __init__(self, degree, is_aromatic,
-                 symbol, num_explicit_Hs, formal_charge, chirality):
-        self.degree = degree
-        self.is_aromatic = is_aromatic
-        self.symbol = symbol
-        self.num_explicit_Hs = num_explicit_Hs
-        self.formal_charge = formal_charge
-        self.chirality = chirality
-    @property
-    def terminal(self):
-        return True
-    def __eq__(self, other):
-        if not isinstance(other, TSymbol):
-            return False
-        if self.degree != other.degree:
-            return False
-        if self.is_aromatic != other.is_aromatic:
-            return False
-        if self.symbol != other.symbol:
-            return False
-        if self.num_explicit_Hs != other.num_explicit_Hs:
-            return False
-        if self.formal_charge != other.formal_charge:
-            return False
-        if self.chirality != other.chirality:
-            return False
-        return True
-    def __hash__(self):
-        return self.__str__().__hash__()
-    def __str__(self):
-        return f'degree={self.degree}, is_aromatic={self.is_aromatic}, '\
-            f'symbol={self.symbol}, '\
-            f'num_explicit_Hs={self.num_explicit_Hs}, '\
-            f'formal_charge={self.formal_charge}, chirality={self.chirality}'
-class NTSymbol(object):
-    ''' non-terminal symbol
-    Attributes
-    ----------
-    degree : int
-        degree of the hyperedge
-    is_aromatic : bool
-        if True, at least one of the associated bonds must be aromatic.
-    node_aromatic_list : list of bool
-        indicate whether each of the nodes is aromatic or not.
-    bond_type_list : list of int
-        bond type of each node"
-    '''
-    def __init__(self, degree: int, is_aromatic: bool,
-                 bond_symbol_list: list,
-                 for_ring=False):
-        self.degree = degree
-        self.is_aromatic = is_aromatic
-        self.for_ring = for_ring
-        self.bond_symbol_list = bond_symbol_list
-    @property
-    def terminal(self) -> bool:
-        return False
-    @property
-    def symbol(self):
-        return f'NT{self.degree}'
-    def __eq__(self, other) -> bool:
-        if not isinstance(other, NTSymbol):
-            return False
-        if self.degree != other.degree:
-            return False
-        if self.is_aromatic != other.is_aromatic:
-            return False
-        if self.for_ring != other.for_ring:
-            return False
-        if len(self.bond_symbol_list) != len(other.bond_symbol_list):
-            return False
-        for each_idx in range(len(self.bond_symbol_list)):
-            if self.bond_symbol_list[each_idx] != other.bond_symbol_list[each_idx]:
-                return False
-        return True
-    def __hash__(self):
-        return self.__str__().__hash__()
-    def __str__(self) -> str:
-        return f'degree={self.degree}, is_aromatic={self.is_aromatic}, '\
-            f'bond_symbol_list={[str(each_symbol) for each_symbol in self.bond_symbol_list]}'\
-            f'for_ring={self.for_ring}'
-class BondSymbol(object):
-    ''' Bond symbol
-    Attributes
-    ----------
-    is_aromatic : bool
-        if True, at least one of the associated bonds must be aromatic.
-    bond_type : int
-        bond type of each node"
-    '''
-    def __init__(self, is_aromatic: bool,
-                 bond_type: int,
-                 stereo: int):
-        self.is_aromatic = is_aromatic
-        self.bond_type = bond_type
-        self.stereo = stereo
-    def __eq__(self, other) -> bool:
-        if not isinstance(other, BondSymbol):
-            return False
-        if self.is_aromatic != other.is_aromatic:
-            return False
-        if self.bond_type != other.bond_type:
-            return False
-        if self.stereo != other.stereo:
-            return False
-        return True
-    def __hash__(self):
-        return self.__str__().__hash__()
-    def __str__(self) -> str:
-        return f'is_aromatic={self.is_aromatic}, '\
-            f'bond_type={self.bond_type}, '\
-            f'stereo={self.stereo}, '

graph_grammar/graph_grammar/utils.py DELETED Viewed

@@ -1,130 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# Rhizome
-# Version beta 0.0, August 2023
-# Property of IBM Research, Accelerated Discovery
-#
-"""
-PLEASE NOTE THIS IMPLEMENTATION INCLUDES THE ORIGINAL SOURCE CODE (AND SOME ADAPTATIONS)
-OF THE MHG IMPLEMENTATION OF HIROSHI KAJINO AT IBM TRL ALREADY PUBLICLY AVAILABLE.
-THIS MIGHT INFLUENCE THE DECISION OF THE FINAL LICENSE SO CAREFUL CHECK NEEDS BE DONE.
-"""
-""" Title """
-__author__ = "Hiroshi Kajino <KAJINO@jp.ibm.com>"
-__copyright__ = "(c) Copyright IBM Corp. 2018"
-__version__ = "0.1"
-__date__ = "Jun 4 2018"
-from ..hypergraph import Hypergraph
-from copy import deepcopy
-from typing import List
-import numpy as np
-def common_node_list(hg1: Hypergraph, hg2: Hypergraph) -> List[str]:
-    """ return a list of common nodes
-    Parameters
-    ----------
-    hg1, hg2 : Hypergraph
-    Returns
-    -------
-    list of str
-        list of common nodes
-    """
-    if hg1 is None or hg2 is None:
-        return [], False
-    else:
-        node_set = hg1.nodes.intersection(hg2.nodes)
-        node_dict = {}
-        if 'order4hrg' in hg1.node_attr(list(hg1.nodes)[0]):
-            for each_node in node_set:
-                node_dict[each_node] = hg1.node_attr(each_node)['order4hrg']
-        else:
-            for each_node in node_set:
-                node_dict[each_node] = hg1.node_attr(each_node)['symbol'].__hash__()
-        node_list = []
-        for each_key, _ in sorted(node_dict.items(), key=lambda x:x[1]):
-            node_list.append(each_key)
-        edge_name = hg1.has_edge(node_list, ignore_order=True)
-        if edge_name:
-            if not hg1.edge_attr(edge_name).get('terminal', True):
-                node_list = hg1.nodes_in_edge(edge_name)
-            return node_list, True
-        else:
-            return node_list, False
-def _node_match(node1, node2):
-    # if the nodes are hyperedges, `atom_attr` determines the match
-    if node1['bipartite'] == 'edge' and node2['bipartite'] == 'edge':
-        return node1["attr_dict"]['symbol'] == node2["attr_dict"]['symbol']
-    elif node1['bipartite'] == 'node' and node2['bipartite'] == 'node':
-        # bond_symbol
-        return node1['attr_dict']['symbol'] == node2['attr_dict']['symbol']
-    else:
-        return False
-def _easy_node_match(node1, node2):
-    # if the nodes are hyperedges, `atom_attr` determines the match
-    if node1['bipartite'] == 'edge' and node2['bipartite'] == 'edge':
-        return node1["attr_dict"].get('symbol', None) == node2["attr_dict"].get('symbol', None)
-    elif node1['bipartite'] == 'node' and node2['bipartite'] == 'node':
-        # bond_symbol
-        return node1['attr_dict'].get('ext_id', -1) == node2['attr_dict'].get('ext_id', -1)\
-            and node1['attr_dict']['symbol'] == node2['attr_dict']['symbol']
-    else:
-        return False
-def _node_match_prod_rule(node1, node2, ignore_order=False):
-    # if the nodes are hyperedges, `atom_attr` determines the match
-    if node1['bipartite'] == 'edge' and node2['bipartite'] == 'edge':
-        return node1["attr_dict"]['symbol'] == node2["attr_dict"]['symbol']
-    elif node1['bipartite'] == 'node' and node2['bipartite'] == 'node':
-        # ext_id, order4hrg, bond_symbol
-        if ignore_order:
-            return node1['attr_dict']['symbol'] == node2['attr_dict']['symbol']
-        else:
-            return node1['attr_dict']['symbol'] == node2['attr_dict']['symbol']\
-                and node1['attr_dict'].get('ext_id', -1) == node2['attr_dict'].get('ext_id', -1)
-    else:
-        return False
-def _edge_match(edge1, edge2, ignore_order=False):
-    #return True
-    if ignore_order:
-        return True
-    else:
-        return edge1["order"] == edge2["order"]
-def masked_softmax(logit, mask):
-    ''' compute a probability distribution from logit
-    Parameters
-    ----------
-    logit : array-like, length D
-        each element indicates how each dimension is likely to be chosen
-        (the larger, the more likely)
-    mask : array-like, length D
-        each element is either 0 or 1.
-        if 0, the dimension is ignored
-        when computing the probability distribution.
-    Returns
-    -------
-    prob_dist : array, length D
-        probability distribution computed from logit.
-        if `mask[d] = 0`, `prob_dist[d] = 0`.
-    '''
-    if logit.shape != mask.shape:
-        raise ValueError('logit and mask must have the same shape')
-    c = np.max(logit)
-    exp_logit = np.exp(logit - c) * mask
-    sum_exp_logit = exp_logit @ mask
-    return exp_logit / sum_exp_logit

graph_grammar/hypergraph.py DELETED Viewed

@@ -1,544 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# Rhizome
-# Version beta 0.0, August 2023
-# Property of IBM Research, Accelerated Discovery
-#
-"""
-PLEASE NOTE THIS IMPLEMENTATION INCLUDES THE ORIGINAL SOURCE CODE (AND SOME ADAPTATIONS)
-OF THE MHG IMPLEMENTATION OF HIROSHI KAJINO AT IBM TRL ALREADY PUBLICLY AVAILABLE.
-THIS MIGHT INFLUENCE THE DECISION OF THE FINAL LICENSE SO CAREFUL CHECK NEEDS BE DONE.
-"""
-""" Title """
-__author__ = "Hiroshi Kajino <KAJINO@jp.ibm.com>"
-__copyright__ = "(c) Copyright IBM Corp. 2018"
-__version__ = "0.1"
-__date__ = "Jan 31 2018"
-from copy import deepcopy
-from typing import List, Dict, Tuple
-import networkx as nx
-import numpy as np
-import os
-class Hypergraph(object):
-    '''
-    A class of a hypergraph.
-    Each hyperedge can be ordered. For the ordered case,
-    edges adjacent to the hyperedge node are labeled by their orders.
-    Attributes
-    ----------
-    hg : nx.Graph
-        a bipartite graph representation of a hypergraph
-    edge_idx : int
-        total number of hyperedges that exist so far
-    '''
-    def __init__(self):
-        self.hg = nx.Graph()
-        self.edge_idx = 0
-        self.nodes = set([])
-        self.num_nodes = 0
-        self.edges = set([])
-        self.num_edges = 0
-        self.nodes_in_edge_dict = {}
-    def add_node(self, node: str, attr_dict=None):
-        ''' add a node to hypergraph
-        Parameters
-        ----------
-        node : str
-            node name
-        attr_dict : dict
-            dictionary of node attributes
-        '''
-        self.hg.add_node(node, bipartite='node', attr_dict=attr_dict)
-        if node not in self.nodes:
-            self.num_nodes += 1
-        self.nodes.add(node)
-    def add_edge(self, node_list: List[str], attr_dict=None, edge_name=None):
-        ''' add an edge consisting of nodes `node_list`
-        Parameters
-        ----------
-        node_list : list
-            ordered list of nodes that consist the edge
-        attr_dict : dict
-            dictionary of edge attributes
-        '''
-        if edge_name is None:
-            edge = 'e{}'.format(self.edge_idx)
-        else:
-            assert edge_name not in self.edges
-            edge = edge_name
-        self.hg.add_node(edge, bipartite='edge', attr_dict=attr_dict)
-        if edge not in self.edges:
-            self.num_edges += 1
-        self.edges.add(edge)
-        self.nodes_in_edge_dict[edge] = node_list
-        if type(node_list) == list:
-            for node_idx, each_node in enumerate(node_list):
-                self.hg.add_edge(edge, each_node, order=node_idx)
-                if each_node not in self.nodes:
-                    self.num_nodes += 1
-                self.nodes.add(each_node)
-        elif type(node_list) == set:
-            for each_node in node_list:
-                self.hg.add_edge(edge, each_node, order=-1)
-                if each_node not in self.nodes:
-                    self.num_nodes += 1
-                self.nodes.add(each_node)
-        else:
-            raise ValueError
-        self.edge_idx += 1
-        return edge
-    def remove_node(self, node: str, remove_connected_edges=True):
-        ''' remove a node
-        Parameters
-        ----------
-        node : str
-            node name
-        remove_connected_edges : bool
-            if True, remove edges that are adjacent to the node
-        '''
-        if remove_connected_edges:
-            connected_edges = deepcopy(self.adj_edges(node))
-            for each_edge in connected_edges:
-                self.remove_edge(each_edge)
-        self.hg.remove_node(node)
-        self.num_nodes -= 1
-        self.nodes.remove(node)
-    def remove_nodes(self, node_iter, remove_connected_edges=True):
-        ''' remove a set of nodes
-        Parameters
-        ----------
-        node_iter : iterator of strings
-            nodes to be removed
-        remove_connected_edges : bool
-            if True, remove edges that are adjacent to the node
-        '''
-        for each_node in node_iter:
-            self.remove_node(each_node, remove_connected_edges)
-    def remove_edge(self, edge: str):
-        ''' remove an edge
-        Parameters
-        ----------
-        edge : str
-            edge to be removed
-        '''
-        self.hg.remove_node(edge)
-        self.edges.remove(edge)
-        self.num_edges -= 1
-        self.nodes_in_edge_dict.pop(edge)
-    def remove_edges(self, edge_iter):
-        ''' remove a set of edges
-        Parameters
-        ----------
-        edge_iter : iterator of strings
-            edges to be removed
-        '''
-        for each_edge in edge_iter:
-            self.remove_edge(each_edge)
-    def remove_edges_with_attr(self, edge_attr_dict):
-        remove_edge_list = []
-        for each_edge in self.edges:
-            satisfy = True
-            for each_key, each_val in edge_attr_dict.items():
-                if not satisfy:
-                    break
-                try:
-                    if self.edge_attr(each_edge)[each_key] != each_val:
-                        satisfy = False
-                except KeyError:
-                    satisfy = False
-            if satisfy:
-                remove_edge_list.append(each_edge)
-        self.remove_edges(remove_edge_list)
-    def remove_subhg(self, subhg):
-        ''' remove subhypergraph.
-        all of the hyperedges are removed.
-        each node of subhg is removed if its degree becomes 0 after removing hyperedges.
-        Parameters
-        ----------
-        subhg : Hypergraph
-        '''
-        for each_edge in subhg.edges:
-            self.remove_edge(each_edge)
-        for each_node in subhg.nodes:
-            if self.degree(each_node) == 0:
-                self.remove_node(each_node)
-    def nodes_in_edge(self, edge):
-        ''' return an ordered list of nodes in a given edge.
-        Parameters
-        ----------
-        edge : str
-            edge whose nodes are returned
-        Returns
-        -------
-        list or set
-            ordered list or set of nodes that belong to the edge
-        '''
-        if edge.startswith('e'):
-            return self.nodes_in_edge_dict[edge]
-        else:
-            adj_node_list = self.hg.adj[edge]
-            adj_node_order_list = []
-            adj_node_name_list = []
-            for each_node in adj_node_list:
-                adj_node_order_list.append(adj_node_list[each_node]['order'])
-                adj_node_name_list.append(each_node)
-            if adj_node_order_list == [-1] * len(adj_node_order_list):
-                return set(adj_node_name_list)
-            else:
-                return [adj_node_name_list[each_idx] for each_idx
-                        in np.argsort(adj_node_order_list)]
-    def adj_edges(self, node):
-        ''' return a dict of adjacent hyperedges
-        Parameters
-        ----------
-        node : str
-        Returns
-        -------
-        set
-            set of edges that are adjacent to `node`
-        '''
-        return self.hg.adj[node]
-    def adj_nodes(self, node):
-        ''' return a set of adjacent nodes
-        Parameters
-        ----------
-        node : str
-        Returns
-        -------
-        set
-            set of nodes that are adjacent to `node`
-        '''
-        node_set = set([])
-        for each_adj_edge in self.adj_edges(node):
-            node_set.update(set(self.nodes_in_edge(each_adj_edge)))
-        node_set.discard(node)
-        return node_set
-    def has_edge(self, node_list, ignore_order=False):
-        for each_edge in self.edges:
-            if ignore_order:
-                if set(self.nodes_in_edge(each_edge)) == set(node_list):
-                    return each_edge
-            else:
-                if self.nodes_in_edge(each_edge) == node_list:
-                    return each_edge
-        return False
-    def degree(self, node):
-        return len(self.hg.adj[node])
-    def degrees(self):
-        return {each_node: self.degree(each_node) for each_node in self.nodes}
-    def edge_degree(self, edge):
-        return len(self.nodes_in_edge(edge))
-    def edge_degrees(self):
-        return {each_edge: self.edge_degree(each_edge) for each_edge in self.edges}
-    def is_adj(self, node1, node2):
-        return node1 in self.adj_nodes(node2)
-    def adj_subhg(self, node, ident_node_dict=None):
-        """ return a subhypergraph consisting of a set of nodes and hyperedges adjacent to `node`.
-        if an adjacent node has a self-loop hyperedge, it will be also added to the subhypergraph.
-        Parameters
-        ----------
-        node : str
-        ident_node_dict : dict
-            dict containing identical nodes. see `get_identical_node_dict` for more details
-        Returns
-        -------
-        subhg : Hypergraph
-        """
-        if ident_node_dict is None:
-            ident_node_dict = self.get_identical_node_dict()
-        adj_node_set = set(ident_node_dict[node])
-        adj_edge_set = set([])
-        for each_node in ident_node_dict[node]:
-            adj_edge_set.update(set(self.adj_edges(each_node)))
-        fixed_adj_edge_set = deepcopy(adj_edge_set)
-        for each_edge in fixed_adj_edge_set:
-            other_nodes = self.nodes_in_edge(each_edge)
-            adj_node_set.update(other_nodes)
-            # if the adjacent node has self-loop edge, it will be appended to adj_edge_list.
-            for each_node in other_nodes:
-                for other_edge in set(self.adj_edges(each_node)) - set([each_edge]):
-                    if len(set(self.nodes_in_edge(other_edge)) \
-                           - set(self.nodes_in_edge(each_edge))) == 0:
-                        adj_edge_set.update(set([other_edge]))
-        subhg = Hypergraph()
-        for each_node in adj_node_set:
-            subhg.add_node(each_node, attr_dict=self.node_attr(each_node))
-        for each_edge in adj_edge_set:
-            subhg.add_edge(self.nodes_in_edge(each_edge),
-                           attr_dict=self.edge_attr(each_edge),
-                           edge_name=each_edge)
-        subhg.edge_idx = self.edge_idx
-        return subhg
-    def get_subhg(self, node_list, edge_list, ident_node_dict=None):
-        """ return a subhypergraph consisting of a set of nodes and hyperedges adjacent to `node`.
-        if an adjacent node has a self-loop hyperedge, it will be also added to the subhypergraph.
-        Parameters
-        ----------
-        node : str
-        ident_node_dict : dict
-            dict containing identical nodes. see `get_identical_node_dict` for more details
-        Returns
-        -------
-        subhg : Hypergraph
-        """
-        if ident_node_dict is None:
-            ident_node_dict = self.get_identical_node_dict()
-        adj_node_set = set([])
-        for each_node in node_list:
-            adj_node_set.update(set(ident_node_dict[each_node]))
-        adj_edge_set = set(edge_list)
-        subhg = Hypergraph()
-        for each_node in adj_node_set:
-            subhg.add_node(each_node,
-                           attr_dict=deepcopy(self.node_attr(each_node)))
-        for each_edge in adj_edge_set:
-            subhg.add_edge(self.nodes_in_edge(each_edge),
-                           attr_dict=deepcopy(self.edge_attr(each_edge)),
-                           edge_name=each_edge)
-        subhg.edge_idx = self.edge_idx
-        return subhg
-    def copy(self):
-        ''' return a copy of the object
-        Returns
-        -------
-        Hypergraph
-        '''
-        return deepcopy(self)
-    def node_attr(self, node):
-        return self.hg.nodes[node]['attr_dict']
-    def edge_attr(self, edge):
-        return self.hg.nodes[edge]['attr_dict']
-    def set_node_attr(self, node, attr_dict):
-        for each_key, each_val in attr_dict.items():
-            self.hg.nodes[node]['attr_dict'][each_key] = each_val
-    def set_edge_attr(self, edge, attr_dict):
-        for each_key, each_val in attr_dict.items():
-            self.hg.nodes[edge]['attr_dict'][each_key] = each_val
-    def get_identical_node_dict(self):
-        ''' get identical nodes
-        nodes are identical if they share the same set of adjacent edges.
-        Returns
-        -------
-        ident_node_dict : dict
-            ident_node_dict[node] returns a list of nodes that are identical to `node`.
-        '''
-        ident_node_dict = {}
-        for each_node in self.nodes:
-            ident_node_list = []
-            for each_other_node in self.nodes:
-                if each_other_node == each_node:
-                    ident_node_list.append(each_other_node)
-                elif self.adj_edges(each_node) == self.adj_edges(each_other_node) \
-                   and len(self.adj_edges(each_node)) != 0:
-                    ident_node_list.append(each_other_node)
-            ident_node_dict[each_node] = ident_node_list
-        return ident_node_dict
-    '''
-        ident_node_dict = {}
-        for each_node in self.nodes:
-            ident_node_dict[each_node] = [each_node]
-        return ident_node_dict
-    '''
-    def get_leaf_edge(self):
-        ''' get an edge that is incident only to one edge
-        Returns
-        -------
-        if exists, return a leaf edge. otherwise, return None.
-        '''
-        for each_edge in self.edges:
-            if len(self.adj_nodes(each_edge)) == 1:
-                if 'tmp' not in self.edge_attr(each_edge):
-                    return each_edge
-        return None
-    def get_nontmp_edge(self):
-        for each_edge in self.edges:
-            if 'tmp' not in self.edge_attr(each_edge):
-                return each_edge
-        return None
-    def is_subhg(self, hg):
-        ''' return whether this hypergraph is a subhypergraph of `hg`
-        Returns
-        -------
-        True if self \in hg,
-        False otherwise.
-        '''
-        for each_node in self.nodes:
-            if each_node not in hg.nodes:
-                return False
-        for each_edge in self.edges:
-            if each_edge not in hg.edges:
-                return False
-        return True
-    def in_cycle(self, node, visited=None, parent='', root_node='') -> bool:
-        ''' if `node` is in a cycle, then return True. otherwise, False.
-        Parameters
-        ----------
-        node : str
-            node in a hypergraph
-        visited : list
-            list of visited nodes, used for recursion
-        parent : str
-            parent node, used to eliminate a cycle consisting of two nodes and one edge.
-        Returns
-        -------
-        bool
-        '''
-        if visited is None:
-            visited = []
-        if parent == '':
-            visited = []
-        if root_node == '':
-            root_node = node
-        visited.append(node)
-        for each_adj_node in self.adj_nodes(node):
-            if each_adj_node not in visited:
-                if self.in_cycle(each_adj_node, visited, node, root_node):
-                    return True
-            elif each_adj_node != parent and each_adj_node == root_node:
-                return True
-        return False
-    def draw(self, file_path=None, with_node=False, with_edge_name=False):
-        ''' draw hypergraph
-        '''
-        import graphviz
-        G = graphviz.Graph(format='png')
-        for each_node in self.nodes:
-            if 'ext_id' in self.node_attr(each_node):
-                G.node(each_node, label='',
-                       shape='circle', width='0.1', height='0.1', style='filled',
-                       fillcolor='black')
-            else:
-                if with_node:
-                    G.node(each_node, label='',
-                           shape='circle', width='0.1', height='0.1', style='filled',
-                           fillcolor='gray')
-        edge_list = []
-        for each_edge in self.edges:
-            if self.edge_attr(each_edge).get('terminal', False):
-                G.node(each_edge,
-                       label=self.edge_attr(each_edge)['symbol'].symbol if not with_edge_name \
-                       else self.edge_attr(each_edge)['symbol'].symbol + ', ' + each_edge,
-                       fontcolor='black', shape='square')
-            elif self.edge_attr(each_edge).get('tmp', False):
-                G.node(each_edge, label='tmp' if not with_edge_name else 'tmp, ' + each_edge,
-                       fontcolor='black', shape='square')
-            else:
-                G.node(each_edge,
-                       label=self.edge_attr(each_edge)['symbol'].symbol if not with_edge_name \
-                       else self.edge_attr(each_edge)['symbol'].symbol + ', ' + each_edge,
-                       fontcolor='black', shape='square', style='filled')
-            if with_node:
-                for each_node in self.nodes_in_edge(each_edge):
-                    G.edge(each_edge, each_node)
-            else:
-                for each_node in self.nodes_in_edge(each_edge):
-                    if 'ext_id' in self.node_attr(each_node)\
-                       and set([each_node, each_edge]) not in edge_list:
-                        G.edge(each_edge, each_node)
-                        edge_list.append(set([each_node, each_edge]))
-                for each_other_edge in self.adj_nodes(each_edge):
-                    if set([each_edge, each_other_edge]) not in edge_list:
-                        num_bond = 0
-                        common_node_set = set(self.nodes_in_edge(each_edge))\
-                                          .intersection(set(self.nodes_in_edge(each_other_edge)))
-                        for each_node in common_node_set:
-                            if self.node_attr(each_node)['symbol'].bond_type in [1, 2, 3]:
-                                num_bond += self.node_attr(each_node)['symbol'].bond_type
-                            elif self.node_attr(each_node)['symbol'].bond_type in [12]:
-                                num_bond += 1
-                            else:
-                                raise NotImplementedError('unsupported bond type')
-                        for _ in range(num_bond):
-                            G.edge(each_edge, each_other_edge)
-                        edge_list.append(set([each_edge, each_other_edge]))
-        if file_path is not None:
-            G.render(file_path, cleanup=True)
-            #os.remove(file_path)
-        return G
-    def is_dividable(self, node):
-        _hg = deepcopy(self.hg)
-        _hg.remove_node(node)
-        return (not nx.is_connected(_hg))
-    def divide(self, node):
-        subhg_list = []
-        hg_wo_node = deepcopy(self)
-        hg_wo_node.remove_node(node, remove_connected_edges=False)
-        connected_components = nx.connected_components(hg_wo_node.hg)
-        for each_component in connected_components:
-            node_list = [node]
-            edge_list = []
-            node_list.extend([each_node for each_node in each_component
-                              if each_node.startswith('bond_')])
-            edge_list.extend([each_edge for each_edge in each_component
-                              if each_edge.startswith('e')])
-            subhg_list.append(self.get_subhg(node_list, edge_list))
-            #subhg_list[-1].set_node_attr(node, {'divided': True})
-        return subhg_list

graph_grammar/io/__init__.py DELETED Viewed

@@ -1,20 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# Rhizome
-# Version beta 0.0, August 2023
-# Property of IBM Research, Accelerated Discovery
-#
-"""
-PLEASE NOTE THIS IMPLEMENTATION INCLUDES THE ORIGINAL SOURCE CODE (AND SOME ADAPTATIONS)
-OF THE MHG IMPLEMENTATION OF HIROSHI KAJINO AT IBM TRL ALREADY PUBLICLY AVAILABLE.
-THIS MIGHT INFLUENCE THE DECISION OF THE FINAL LICENSE SO CAREFUL CHECK NEEDS BE DONE.
-"""
-""" Title """
-__author__ = "Hiroshi Kajino <KAJINO@jp.ibm.com>"
-__copyright__ = "(c) Copyright IBM Corp. 2018"
-__version__ = "0.1"
-__date__ = "Jan 1 2018"

graph_grammar/io/__pycache__/__init__.cpython-310.pyc DELETED Viewed

Binary file (669 Bytes)

graph_grammar/io/__pycache__/smi.cpython-310.pyc DELETED Viewed

Binary file (12.9 kB)

graph_grammar/io/smi.py DELETED Viewed

@@ -1,559 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# Rhizome
-# Version beta 0.0, August 2023
-# Property of IBM Research, Accelerated Discovery
-#
-"""
-PLEASE NOTE THIS IMPLEMENTATION INCLUDES THE ORIGINAL SOURCE CODE (AND SOME ADAPTATIONS)
-OF THE MHG IMPLEMENTATION OF HIROSHI KAJINO AT IBM TRL ALREADY PUBLICLY AVAILABLE.
-THIS MIGHT INFLUENCE THE DECISION OF THE FINAL LICENSE SO CAREFUL CHECK NEEDS BE DONE.
-"""
-""" Title """
-__author__ = "Hiroshi Kajino <KAJINO@jp.ibm.com>"
-__copyright__ = "(c) Copyright IBM Corp. 2018"
-__version__ = "0.1"
-__date__ = "Jan 12 2018"
-from copy import deepcopy
-from rdkit import Chem
-from rdkit import RDLogger
-import networkx as nx
-import numpy as np
-from ..hypergraph import Hypergraph
-from ..graph_grammar.symbols import TSymbol, BondSymbol
-# supress warnings
-lg = RDLogger.logger()
-lg.setLevel(RDLogger.CRITICAL)
-class HGGen(object):
-    """
-    load .smi file and yield a hypergraph.
-    Attributes
-    ----------
-    path_to_file : str
-        path to .smi file
-    kekulize : bool
-        kekulize or not
-    add_Hs : bool
-        add implicit hydrogens to the molecule or not.
-    all_single : bool
-        if True, all multiple bonds are summarized into a single bond with some attributes
-    Yields
-    ------
-    Hypergraph
-    """
-    def __init__(self, path_to_file, kekulize=True, add_Hs=False, all_single=True):
-        self.num_line = 1
-        self.mol_gen = Chem.SmilesMolSupplier(path_to_file, titleLine=False)
-        self.kekulize = kekulize
-        self.add_Hs = add_Hs
-        self.all_single = all_single
-    def __iter__(self):
-        return self
-    def __next__(self):
-        '''
-        each_mol = None
-        while each_mol is None:
-            each_mol = next(self.mol_gen)
-        '''
-        # not ignoring parse errors
-        each_mol = next(self.mol_gen)
-        if each_mol is None:
-            raise ValueError(f'incorrect smiles in line {self.num_line}')
-        else:
-            self.num_line += 1
-        return mol_to_hg(each_mol, self.kekulize, self.add_Hs)
-def mol_to_bipartite(mol, kekulize):
-    """
-    get a bipartite representation of a molecule.
-    Parameters
-    ----------
-    mol : rdkit.Chem.rdchem.Mol
-        molecule object
-    Returns
-    -------
-    nx.Graph
-        a bipartite graph representing which bond is connected to which atoms.
-    """
-    try:
-        mol = standardize_stereo(mol)
-    except KeyError:
-        print(Chem.MolToSmiles(mol))
-        raise KeyError
-    if kekulize:
-        Chem.Kekulize(mol)
-    bipartite_g = nx.Graph()
-    for each_atom in mol.GetAtoms():
-        bipartite_g.add_node(f"atom_{each_atom.GetIdx()}",
-                             atom_attr=atom_attr(each_atom, kekulize))
-    for each_bond in mol.GetBonds():
-        bond_idx = each_bond.GetIdx()
-        bipartite_g.add_node(
-            f"bond_{bond_idx}",
-            bond_attr=bond_attr(each_bond, kekulize))
-        bipartite_g.add_edge(
-            f"atom_{each_bond.GetBeginAtomIdx()}",
-            f"bond_{bond_idx}")
-        bipartite_g.add_edge(
-            f"atom_{each_bond.GetEndAtomIdx()}",
-            f"bond_{bond_idx}")
-    return bipartite_g
-def mol_to_hg(mol, kekulize, add_Hs):
-    """
-    get a bipartite representation of a molecule.
-    Parameters
-    ----------
-    mol : rdkit.Chem.rdchem.Mol
-        molecule object
-    kekulize : bool
-        kekulize or not
-    add_Hs : bool
-        add implicit hydrogens to the molecule or not.
-    Returns
-    -------
-    Hypergraph
-    """
-    if add_Hs:
-        mol = Chem.AddHs(mol)
-    if kekulize:
-        Chem.Kekulize(mol)
-    bipartite_g = mol_to_bipartite(mol, kekulize)
-    hg = Hypergraph()
-    for each_atom in [each_node for each_node in bipartite_g.nodes()
-                      if each_node.startswith('atom_')]:
-        node_set = set([])
-        for each_bond in bipartite_g.adj[each_atom]:
-            hg.add_node(each_bond,
-                        attr_dict=bipartite_g.nodes[each_bond]['bond_attr'])
-            node_set.add(each_bond)
-        hg.add_edge(node_set,
-                    attr_dict=bipartite_g.nodes[each_atom]['atom_attr'])
-    return hg
-def hg_to_mol(hg, verbose=False):
-    """ convert a hypergraph into Mol object
-    Parameters
-    ----------
-    hg : Hypergraph
-    Returns
-    -------
-    mol : Chem.RWMol
-    """
-    mol = Chem.RWMol()
-    atom_dict = {}
-    bond_set = set([])
-    for each_edge in hg.edges:
-        atom = Chem.Atom(hg.edge_attr(each_edge)['symbol'].symbol)
-        atom.SetNumExplicitHs(hg.edge_attr(each_edge)['symbol'].num_explicit_Hs)
-        atom.SetFormalCharge(hg.edge_attr(each_edge)['symbol'].formal_charge)
-        atom.SetChiralTag(
-            Chem.rdchem.ChiralType.values[
-                hg.edge_attr(each_edge)['symbol'].chirality])
-        atom_idx = mol.AddAtom(atom)
-        atom_dict[each_edge] = atom_idx
-    for each_node in hg.nodes:
-        edge_1, edge_2 = hg.adj_edges(each_node)
-        if edge_1+edge_2 not in bond_set:
-            if hg.node_attr(each_node)['symbol'].bond_type <= 3:
-                num_bond = hg.node_attr(each_node)['symbol'].bond_type
-            elif hg.node_attr(each_node)['symbol'].bond_type == 12:
-                num_bond = 1
-            else:
-                raise ValueError(f'too many bonds; {hg.node_attr(each_node)["bond_symbol"].bond_type}')
-            _ = mol.AddBond(atom_dict[edge_1],
-                            atom_dict[edge_2],
-                            order=Chem.rdchem.BondType.values[num_bond])
-            bond_idx = mol.GetBondBetweenAtoms(atom_dict[edge_1], atom_dict[edge_2]).GetIdx()
-            # stereo
-            mol.GetBondWithIdx(bond_idx).SetStereo(
-                Chem.rdchem.BondStereo.values[hg.node_attr(each_node)['symbol'].stereo])
-            bond_set.update([edge_1+edge_2])
-            bond_set.update([edge_2+edge_1])
-    mol.UpdatePropertyCache()
-    mol = mol.GetMol()
-    not_stereo_mol = deepcopy(mol)
-    if Chem.MolFromSmiles(Chem.MolToSmiles(not_stereo_mol)) is None:
-        raise RuntimeError('no valid molecule was obtained.')
-    try:
-        mol = set_stereo(mol)
-        is_stereo = True
-    except:
-        import traceback
-        traceback.print_exc()
-        is_stereo = False
-    mol_tmp = deepcopy(mol)
-    Chem.SetAromaticity(mol_tmp)
-    if Chem.MolFromSmiles(Chem.MolToSmiles(mol_tmp)) is not None:
-        mol = mol_tmp
-    else:
-        if Chem.MolFromSmiles(Chem.MolToSmiles(mol)) is None:
-            mol = not_stereo_mol
-    mol.UpdatePropertyCache()
-    Chem.GetSymmSSSR(mol)
-    mol = Chem.MolFromSmiles(Chem.MolToSmiles(mol))
-    if verbose:
-        return mol, is_stereo
-    else:
-        return mol
-def hgs_to_mols(hg_list, ignore_error=False):
-    if ignore_error:
-        mol_list = []
-        for each_hg in hg_list:
-            try:
-                mol = hg_to_mol(each_hg)
-            except:
-                mol = None
-            mol_list.append(mol)
-    else:
-        mol_list = [hg_to_mol(each_hg) for each_hg in hg_list]
-    return mol_list
-def hgs_to_smiles(hg_list, ignore_error=False):
-    mol_list = hgs_to_mols(hg_list, ignore_error)
-    smiles_list = []
-    for each_mol in mol_list:
-        try:
-            smiles_list.append(
-                Chem.MolToSmiles(
-                    Chem.MolFromSmiles(
-                        Chem.MolToSmiles(
-                            each_mol))))
-        except:
-            smiles_list.append(None)
-    return smiles_list
-def atom_attr(atom, kekulize):
-    """
-    get atom's attributes
-    Parameters
-    ----------
-    atom : rdkit.Chem.rdchem.Atom
-    kekulize : bool
-        kekulize or not
-    Returns
-    -------
-    atom_attr : dict
-        "is_aromatic" : bool
-            the atom is aromatic or not.
-        "smarts" : str
-            SMARTS representation of the atom.
-    """
-    if kekulize:
-        return {'terminal': True,
-                'is_in_ring': atom.IsInRing(),
-                'symbol': TSymbol(degree=0,
-                                  #degree=atom.GetTotalDegree(),
-                                  is_aromatic=False,
-                                  symbol=atom.GetSymbol(),
-                                  num_explicit_Hs=atom.GetNumExplicitHs(),
-                                  formal_charge=atom.GetFormalCharge(),
-                                  chirality=atom.GetChiralTag().real
-                )}
-    else:
-        return {'terminal': True,
-                'is_in_ring': atom.IsInRing(),
-                'symbol': TSymbol(degree=0,
-                                  #degree=atom.GetTotalDegree(),
-                                  is_aromatic=atom.GetIsAromatic(),
-                                  symbol=atom.GetSymbol(),
-                                  num_explicit_Hs=atom.GetNumExplicitHs(),
-                                  formal_charge=atom.GetFormalCharge(),
-                                  chirality=atom.GetChiralTag().real
-                )}
-def bond_attr(bond, kekulize):
-    """
-    get atom's attributes
-    Parameters
-    ----------
-    bond : rdkit.Chem.rdchem.Bond
-    kekulize : bool
-        kekulize or not
-    Returns
-    -------
-    bond_attr : dict
-        "bond_type" : int
-        {0: rdkit.Chem.rdchem.BondType.UNSPECIFIED,
-         1: rdkit.Chem.rdchem.BondType.SINGLE,
-         2: rdkit.Chem.rdchem.BondType.DOUBLE,
-         3: rdkit.Chem.rdchem.BondType.TRIPLE,
-         4: rdkit.Chem.rdchem.BondType.QUADRUPLE,
-         5: rdkit.Chem.rdchem.BondType.QUINTUPLE,
-         6: rdkit.Chem.rdchem.BondType.HEXTUPLE,
-         7: rdkit.Chem.rdchem.BondType.ONEANDAHALF,
-         8: rdkit.Chem.rdchem.BondType.TWOANDAHALF,
-         9: rdkit.Chem.rdchem.BondType.THREEANDAHALF,
-         10: rdkit.Chem.rdchem.BondType.FOURANDAHALF,
-         11: rdkit.Chem.rdchem.BondType.FIVEANDAHALF,
-         12: rdkit.Chem.rdchem.BondType.AROMATIC,
-         13: rdkit.Chem.rdchem.BondType.IONIC,
-         14: rdkit.Chem.rdchem.BondType.HYDROGEN,
-         15: rdkit.Chem.rdchem.BondType.THREECENTER,
-         16: rdkit.Chem.rdchem.BondType.DATIVEONE,
-         17: rdkit.Chem.rdchem.BondType.DATIVE,
-         18: rdkit.Chem.rdchem.BondType.DATIVEL,
-         19: rdkit.Chem.rdchem.BondType.DATIVER,
-         20: rdkit.Chem.rdchem.BondType.OTHER,
-         21: rdkit.Chem.rdchem.BondType.ZERO}
-    """
-    if kekulize:
-        is_aromatic = False
-        if bond.GetBondType().real == 12:
-            bond_type = 1
-        else:
-            bond_type = bond.GetBondType().real
-    else:
-        is_aromatic = bond.GetIsAromatic()
-        bond_type = bond.GetBondType().real
-    return {'symbol': BondSymbol(is_aromatic=is_aromatic,
-                                 bond_type=bond_type,
-                                 stereo=int(bond.GetStereo())),
-            'is_in_ring': bond.IsInRing()}
-def standardize_stereo(mol):
-    '''
- 0: rdkit.Chem.rdchem.BondDir.NONE,
- 1: rdkit.Chem.rdchem.BondDir.BEGINWEDGE,
- 2: rdkit.Chem.rdchem.BondDir.BEGINDASH,
- 3: rdkit.Chem.rdchem.BondDir.ENDDOWNRIGHT,
- 4: rdkit.Chem.rdchem.BondDir.ENDUPRIGHT,
-    '''
-    # mol = Chem.AddHs(mol) # this removes CIPRank !!!
-    for each_bond in mol.GetBonds():
-        if int(each_bond.GetStereo()) in [2, 3]: #2=Z (same side), 3=E
-            begin_stereo_atom_idx = each_bond.GetBeginAtomIdx()
-            end_stereo_atom_idx = each_bond.GetEndAtomIdx()
-            atom_idx_1 = each_bond.GetStereoAtoms()[0]
-            atom_idx_2 = each_bond.GetStereoAtoms()[1]
-            if mol.GetBondBetweenAtoms(atom_idx_1, begin_stereo_atom_idx):
-                begin_atom_idx = atom_idx_1
-                end_atom_idx = atom_idx_2
-            else:
-                begin_atom_idx = atom_idx_2
-                end_atom_idx = atom_idx_1
-            begin_another_atom_idx = None
-            assert len(mol.GetAtomWithIdx(begin_stereo_atom_idx).GetNeighbors()) <= 3
-            for each_neighbor in mol.GetAtomWithIdx(begin_stereo_atom_idx).GetNeighbors():
-                each_neighbor_idx = each_neighbor.GetIdx()
-                if each_neighbor_idx not in [end_stereo_atom_idx, begin_atom_idx]:
-                    begin_another_atom_idx = each_neighbor_idx
-            end_another_atom_idx = None
-            assert len(mol.GetAtomWithIdx(end_stereo_atom_idx).GetNeighbors()) <= 3
-            for each_neighbor in mol.GetAtomWithIdx(end_stereo_atom_idx).GetNeighbors():
-                each_neighbor_idx = each_neighbor.GetIdx()
-                if each_neighbor_idx not in [begin_stereo_atom_idx, end_atom_idx]:
-                    end_another_atom_idx = each_neighbor_idx
-            '''
-            relationship between begin_atom_idx and end_atom_idx is encoded in GetStereo
-            '''
-            begin_atom_rank = int(mol.GetAtomWithIdx(begin_atom_idx).GetProp('_CIPRank'))
-            end_atom_rank = int(mol.GetAtomWithIdx(end_atom_idx).GetProp('_CIPRank'))
-            try:
-                begin_another_atom_rank = int(mol.GetAtomWithIdx(begin_another_atom_idx).GetProp('_CIPRank'))
-            except:
-                begin_another_atom_rank = np.inf
-            try:
-                end_another_atom_rank = int(mol.GetAtomWithIdx(end_another_atom_idx).GetProp('_CIPRank'))
-            except:
-                end_another_atom_rank = np.inf
-            if begin_atom_rank < begin_another_atom_rank\
-               and end_atom_rank < end_another_atom_rank:
-                pass
-            elif begin_atom_rank < begin_another_atom_rank\
-                 and end_atom_rank > end_another_atom_rank:
-                # (begin_atom_idx +) end_another_atom_idx should be in StereoAtoms
-                if each_bond.GetStereo() == 2:
-                    # set stereo
-                    each_bond.SetStereo(Chem.rdchem.BondStereo.values[3])
-                    # set bond dir
-                    mol = safe_set_bond_dir(mol, begin_atom_idx, begin_stereo_atom_idx, 3)
-                    mol = safe_set_bond_dir(mol, begin_another_atom_idx, begin_stereo_atom_idx, 0)
-                    mol = safe_set_bond_dir(mol, end_atom_idx, end_stereo_atom_idx, 0)
-                    mol = safe_set_bond_dir(mol, end_another_atom_idx, end_stereo_atom_idx, 3)
-                elif each_bond.GetStereo() == 3:
-                    # set stereo
-                    each_bond.SetStereo(Chem.rdchem.BondStereo.values[2])
-                    # set bond dir
-                    mol = safe_set_bond_dir(mol, begin_atom_idx, begin_stereo_atom_idx, 3)
-                    mol = safe_set_bond_dir(mol, begin_another_atom_idx, begin_stereo_atom_idx, 0)
-                    mol = safe_set_bond_dir(mol, end_atom_idx, end_stereo_atom_idx, 0)
-                    mol = safe_set_bond_dir(mol, end_another_atom_idx, end_stereo_atom_idx, 4)
-                else:
-                    raise ValueError
-                each_bond.SetStereoAtoms(begin_atom_idx, end_another_atom_idx)
-            elif begin_atom_rank > begin_another_atom_rank\
-                 and end_atom_rank < end_another_atom_rank:
-                # (end_atom_idx +) begin_another_atom_idx should be in StereoAtoms
-                if each_bond.GetStereo() == 2:
-                    # set stereo
-                    each_bond.SetStereo(Chem.rdchem.BondStereo.values[3])
-                    # set bond dir
-                    mol = safe_set_bond_dir(mol, begin_atom_idx, begin_stereo_atom_idx, 0)
-                    mol = safe_set_bond_dir(mol, begin_another_atom_idx, begin_stereo_atom_idx, 4)
-                    mol = safe_set_bond_dir(mol, end_atom_idx, end_stereo_atom_idx, 4)
-                    mol = safe_set_bond_dir(mol, end_another_atom_idx, end_stereo_atom_idx, 0)
-                elif each_bond.GetStereo() == 3:
-                    # set stereo
-                    each_bond.SetStereo(Chem.rdchem.BondStereo.values[2])
-                    # set bond dir
-                    mol = safe_set_bond_dir(mol, begin_atom_idx, begin_stereo_atom_idx, 0)
-                    mol = safe_set_bond_dir(mol, begin_another_atom_idx, begin_stereo_atom_idx, 4)
-                    mol = safe_set_bond_dir(mol, end_atom_idx, end_stereo_atom_idx, 3)
-                    mol = safe_set_bond_dir(mol, end_another_atom_idx, end_stereo_atom_idx, 0)
-                else:
-                    raise ValueError
-                each_bond.SetStereoAtoms(begin_another_atom_idx, end_atom_idx)
-            elif begin_atom_rank > begin_another_atom_rank\
-                 and end_atom_rank > end_another_atom_rank:
-                # begin_another_atom_idx + end_another_atom_idx should be in StereoAtoms
-                if each_bond.GetStereo() == 2:
-                    # set bond dir
-                    mol = safe_set_bond_dir(mol, begin_atom_idx, begin_stereo_atom_idx, 0)
-                    mol = safe_set_bond_dir(mol, begin_another_atom_idx, begin_stereo_atom_idx, 4)
-                    mol = safe_set_bond_dir(mol, end_atom_idx, end_stereo_atom_idx, 0)
-                    mol = safe_set_bond_dir(mol, end_another_atom_idx, end_stereo_atom_idx, 3)
-                elif each_bond.GetStereo() == 3:
-                    # set bond dir
-                    mol = safe_set_bond_dir(mol, begin_atom_idx, begin_stereo_atom_idx, 0)
-                    mol = safe_set_bond_dir(mol, begin_another_atom_idx, begin_stereo_atom_idx, 4)
-                    mol = safe_set_bond_dir(mol, end_atom_idx, end_stereo_atom_idx, 0)
-                    mol = safe_set_bond_dir(mol, end_another_atom_idx, end_stereo_atom_idx, 4)
-                else:
-                    raise ValueError
-                each_bond.SetStereoAtoms(begin_another_atom_idx, end_another_atom_idx)
-            else:
-                raise RuntimeError
-    return mol
-def set_stereo(mol):
-    '''
- 0: rdkit.Chem.rdchem.BondDir.NONE,
- 1: rdkit.Chem.rdchem.BondDir.BEGINWEDGE,
- 2: rdkit.Chem.rdchem.BondDir.BEGINDASH,
- 3: rdkit.Chem.rdchem.BondDir.ENDDOWNRIGHT,
- 4: rdkit.Chem.rdchem.BondDir.ENDUPRIGHT,
-    '''
-    _mol = Chem.MolFromSmiles(Chem.MolToSmiles(mol))
-    Chem.Kekulize(_mol, True)
-    substruct_match = mol.GetSubstructMatch(_mol)
-    if not substruct_match:
-        ''' mol and _mol are kekulized.
-        sometimes, the order of '=' and '-' changes, which causes mol and _mol not matched.
-        '''
-        Chem.SetAromaticity(mol)
-        Chem.SetAromaticity(_mol)
-        substruct_match = mol.GetSubstructMatch(_mol)
-    try:
-        atom_match = {substruct_match[_mol_atom_idx]: _mol_atom_idx for _mol_atom_idx in range(_mol.GetNumAtoms())} # mol to _mol
-    except:
-        raise ValueError('two molecules obtained from the same data do not match.')
-    for each_bond in mol.GetBonds():
-        begin_atom_idx = each_bond.GetBeginAtomIdx()
-        end_atom_idx = each_bond.GetEndAtomIdx()
-        _bond = _mol.GetBondBetweenAtoms(atom_match[begin_atom_idx], atom_match[end_atom_idx])
-        _bond.SetStereo(each_bond.GetStereo())
-    mol = _mol
-    for each_bond in mol.GetBonds():
-        if int(each_bond.GetStereo()) in [2, 3]: #2=Z (same side), 3=E
-            begin_stereo_atom_idx = each_bond.GetBeginAtomIdx()
-            end_stereo_atom_idx = each_bond.GetEndAtomIdx()
-            begin_atom_idx_set = set([each_neighbor.GetIdx()
-                                      for each_neighbor
-                                      in mol.GetAtomWithIdx(begin_stereo_atom_idx).GetNeighbors()
-                                      if each_neighbor.GetIdx() != end_stereo_atom_idx])
-            end_atom_idx_set = set([each_neighbor.GetIdx()
-                                    for each_neighbor
-                                    in mol.GetAtomWithIdx(end_stereo_atom_idx).GetNeighbors()
-                                    if each_neighbor.GetIdx() != begin_stereo_atom_idx])
-            if not begin_atom_idx_set:
-                each_bond.SetStereo(Chem.rdchem.BondStereo(0))
-                continue
-            if not end_atom_idx_set:
-                each_bond.SetStereo(Chem.rdchem.BondStereo(0))
-                continue
-            if len(begin_atom_idx_set) == 1:
-                begin_atom_idx = begin_atom_idx_set.pop()
-                begin_another_atom_idx = None
-            if len(end_atom_idx_set) == 1:
-                end_atom_idx = end_atom_idx_set.pop()
-                end_another_atom_idx = None
-            if len(begin_atom_idx_set) == 2:
-                atom_idx_1 = begin_atom_idx_set.pop()
-                atom_idx_2 = begin_atom_idx_set.pop()
-                if int(mol.GetAtomWithIdx(atom_idx_1).GetProp('_CIPRank')) < int(mol.GetAtomWithIdx(atom_idx_2).GetProp('_CIPRank')):
-                    begin_atom_idx = atom_idx_1
-                    begin_another_atom_idx = atom_idx_2
-                else:
-                    begin_atom_idx = atom_idx_2
-                    begin_another_atom_idx = atom_idx_1
-            if len(end_atom_idx_set) == 2:
-                atom_idx_1 = end_atom_idx_set.pop()
-                atom_idx_2 = end_atom_idx_set.pop()
-                if int(mol.GetAtomWithIdx(atom_idx_1).GetProp('_CIPRank')) < int(mol.GetAtomWithIdx(atom_idx_2).GetProp('_CIPRank')):
-                    end_atom_idx = atom_idx_1
-                    end_another_atom_idx = atom_idx_2
-                else:
-                    end_atom_idx = atom_idx_2
-                    end_another_atom_idx = atom_idx_1
-            if each_bond.GetStereo() == 2: # same side
-                mol = safe_set_bond_dir(mol, begin_atom_idx, begin_stereo_atom_idx, 3)
-                mol = safe_set_bond_dir(mol, end_atom_idx, end_stereo_atom_idx, 4)
-                each_bond.SetStereoAtoms(begin_atom_idx, end_atom_idx)
-            elif each_bond.GetStereo() == 3: # opposite side
-                mol = safe_set_bond_dir(mol, begin_atom_idx, begin_stereo_atom_idx, 3)
-                mol = safe_set_bond_dir(mol, end_atom_idx, end_stereo_atom_idx, 3)
-                each_bond.SetStereoAtoms(begin_atom_idx, end_atom_idx)
-            else:
-                raise ValueError
-    return mol
-def safe_set_bond_dir(mol, atom_idx_1, atom_idx_2, bond_dir_val):
-    if atom_idx_1 is None or atom_idx_2 is None:
-        return mol
-    else:
-        mol.GetBondBetweenAtoms(atom_idx_1, atom_idx_2).SetBondDir(Chem.rdchem.BondDir.values[bond_dir_val])
-        return mol

graph_grammar/nn/__init__.py DELETED Viewed

@@ -1,11 +0,0 @@
-# -*- coding:utf-8 -*-
-# Rhizome
-# Version beta 0.0, August 2023
-# Property of IBM Research, Accelerated Discovery
-#
-"""
-PLEASE NOTE THIS IMPLEMENTATION INCLUDES THE ORIGINAL SOURCE CODE (AND SOME ADAPTATIONS)
-OF THE MHG IMPLEMENTATION OF HIROSHI KAJINO AT IBM TRL ALREADY PUBLICLY AVAILABLE.
-THIS MIGHT INFLUENCE THE DECISION OF THE FINAL LICENSE SO CAREFUL CHECK NEEDS BE DONE.
-"""

graph_grammar/nn/__pycache__/__init__.cpython-310.pyc DELETED Viewed

Binary file (508 Bytes)

graph_grammar/nn/__pycache__/decoder.cpython-310.pyc DELETED Viewed

Binary file (3.98 kB)

graph_grammar/nn/__pycache__/encoder.cpython-310.pyc DELETED Viewed

Binary file (5.38 kB)

graph_grammar/nn/dataset.py DELETED Viewed

@@ -1,121 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# Rhizome
-# Version beta 0.0, August 2023
-# Property of IBM Research, Accelerated Discovery
-#
-"""
-PLEASE NOTE THIS IMPLEMENTATION INCLUDES THE ORIGINAL SOURCE CODE (AND SOME ADAPTATIONS)
-OF THE MHG IMPLEMENTATION OF HIROSHI KAJINO AT IBM TRL ALREADY PUBLICLY AVAILABLE.
-THIS MIGHT INFLUENCE THE DECISION OF THE FINAL LICENSE SO CAREFUL CHECK NEEDS BE DONE.
-"""
-""" Title """
-__author__ = "Hiroshi Kajino <KAJINO@jp.ibm.com>"
-__copyright__ = "(c) Copyright IBM Corp. 2018"
-__version__ = "0.1"
-__date__ = "Apr 18 2018"
-from torch.utils.data import Dataset, DataLoader
-import torch
-import numpy as np
-def left_padding(sentence_list, max_len, pad_idx=-1, inverse=False):
-    ''' pad left
-    Parameters
-    ----------
-    sentence_list : list of sequences of integers
-    max_len : int
-        maximum length of sentences.
-        if a sentence is shorter than `max_len`, its left part is padded.
-    pad_idx : int
-        integer for padding
-    inverse : bool
-        if True, the sequence is inversed.
-    Returns
-    -------
-    List of torch.LongTensor
-        each sentence is left-padded.
-    '''
-    max_in_list = max([len(each_sen) for each_sen in sentence_list])
-    if max_in_list > max_len:
-        raise ValueError('`max_len` should be larger than the maximum length of input sequences, {}.'.format(max_in_list))
-    if inverse:
-        return [torch.LongTensor([pad_idx] * (max_len - len(each_sen)) + each_sen[::-1]) for each_sen in sentence_list]
-    else:
-        return [torch.LongTensor([pad_idx] * (max_len - len(each_sen)) + each_sen) for each_sen in sentence_list]
-def right_padding(sentence_list, max_len, pad_idx=-1):
-    ''' pad right
-    Parameters
-    ----------
-    sentence_list : list of sequences of integers
-    max_len : int
-        maximum length of sentences.
-        if a sentence is shorter than `max_len`, its right part is padded.
-    pad_idx : int
-        integer for padding
-    Returns
-    -------
-    List of torch.LongTensor
-        each sentence is right-padded.
-    '''
-    max_in_list = max([len(each_sen) for each_sen in sentence_list])
-    if max_in_list > max_len:
-        raise ValueError('`max_len` should be larger than the maximum length of input sequences, {}.'.format(max_in_list))
-    return [torch.LongTensor(each_sen + [pad_idx] * (max_len - len(each_sen))) for each_sen in sentence_list]
-class HRGDataset(Dataset):
-    '''
-    A class of HRG data
-    '''
-    def __init__(self, hrg, prod_rule_seq_list, max_len, target_val_list=None, inversed_input=False):
-        self.hrg = hrg
-        self.left_prod_rule_seq_list = left_padding(prod_rule_seq_list,
-                                                    max_len,
-                                                    inverse=inversed_input)
-        self.right_prod_rule_seq_list = right_padding(prod_rule_seq_list, max_len)
-        self.inserved_input = inversed_input
-        self.target_val_list = target_val_list
-        if target_val_list is not None:
-            if len(prod_rule_seq_list) != len(target_val_list):
-                raise ValueError(f'prod_rule_seq_list and target_val_list have inconsistent lengths: {len(prod_rule_seq_list)}, {len(target_val_list)}')
-    def __len__(self):
-        return len(self.left_prod_rule_seq_list)
-    def __getitem__(self, idx):
-        if self.target_val_list is not None:
-            return self.left_prod_rule_seq_list[idx], self.right_prod_rule_seq_list[idx], np.float32(self.target_val_list[idx])
-        else:
-            return self.left_prod_rule_seq_list[idx], self.right_prod_rule_seq_list[idx]
-    @property
-    def vocab_size(self):
-        return self.hrg.num_prod_rule
-def batch_padding(each_batch, batch_size, padding_idx):
-    num_pad = batch_size - len(each_batch[0])
-    if num_pad:
-        each_batch[0] = torch.cat([each_batch[0],
-                                   padding_idx * torch.ones((batch_size - len(each_batch[0]),
-                                                             len(each_batch[0][0])), dtype=torch.int64)], dim=0)
-        each_batch[1] = torch.cat([each_batch[1],
-                                   padding_idx * torch.ones((batch_size - len(each_batch[1]),
-                                                             len(each_batch[1][0])), dtype=torch.int64)], dim=0)
-    return each_batch, num_pad

graph_grammar/nn/decoder.py DELETED Viewed

@@ -1,158 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# Rhizome
-# Version beta 0.0, August 2023
-# Property of IBM Research, Accelerated Discovery
-#
-"""
-PLEASE NOTE THIS IMPLEMENTATION INCLUDES THE ORIGINAL SOURCE CODE (AND SOME ADAPTATIONS)
-OF THE MHG IMPLEMENTATION OF HIROSHI KAJINO AT IBM TRL ALREADY PUBLICLY AVAILABLE.
-THIS MIGHT INFLUENCE THE DECISION OF THE FINAL LICENSE SO CAREFUL CHECK NEEDS BE DONE.
-"""
-""" Title """
-__author__ = "Hiroshi Kajino <KAJINO@jp.ibm.com>"
-__copyright__ = "(c) Copyright IBM Corp. 2018"
-__version__ = "0.1"
-__date__ = "Aug 9 2018"
-import abc
-import numpy as np
-import torch
-from torch import nn
-class DecoderBase(nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.hidden_dict = {}
-    @abc.abstractmethod
-    def forward_one_step(self, tgt_emb_in):
-        ''' one-step forward model
-        Parameters
-        ----------
-        tgt_emb_in : Tensor, shape (batch_size, input_dim)
-        Returns
-        -------
-        Tensor, shape (batch_size, hidden_dim)
-        '''
-        tgt_emb_out = None
-        return tgt_emb_out
-    @abc.abstractmethod
-    def init_hidden(self):
-        ''' initialize the hidden states
-        '''
-        pass
-    @abc.abstractmethod
-    def feed_hidden(self, hidden_dict_0):
-        for each_hidden in self.hidden_dict.keys():
-            self.hidden_dict[each_hidden][0] = hidden_dict_0[each_hidden]
-class GRUDecoder(DecoderBase):
-    def __init__(self, input_dim: int, hidden_dim: int, num_layers: int,
-                 dropout: float, batch_size: int, use_gpu: bool,
-                 no_dropout=False):
-        super().__init__()
-        self.input_dim = input_dim
-        self.hidden_dim = hidden_dim
-        self.num_layers = num_layers
-        self.dropout = dropout
-        self.batch_size = batch_size
-        self.use_gpu = use_gpu
-        self.model = nn.GRU(input_size=self.input_dim,
-                            hidden_size=self.hidden_dim,
-                            num_layers=self.num_layers,
-                            batch_first=True,
-                            bidirectional=False,
-                            dropout=self.dropout if not no_dropout else 0
-        )
-        if self.use_gpu:
-            self.model.cuda()
-        self.init_hidden()
-    def init_hidden(self):
-        self.hidden_dict['h'] = torch.zeros((self.num_layers,
-                                             self.batch_size,
-                                             self.hidden_dim),
-                                            requires_grad=False)
-        if self.use_gpu:
-            self.hidden_dict['h'] = self.hidden_dict['h'].cuda()
-    def forward_one_step(self, tgt_emb_in):
-        ''' one-step forward model
-        Parameters
-        ----------
-        tgt_emb_in : Tensor, shape (batch_size, input_dim)
-        Returns
-        -------
-        Tensor, shape (batch_size, hidden_dim)
-        '''
-        tgt_emb_out, self.hidden_dict['h'] \
-            = self.model(tgt_emb_in.view(self.batch_size, 1, -1),
-                         self.hidden_dict['h'])
-        return tgt_emb_out
-class LSTMDecoder(DecoderBase):
-    def __init__(self, input_dim: int, hidden_dim: int, num_layers: int,
-                 dropout: float, batch_size: int, use_gpu: bool,
-                 no_dropout=False):
-        super().__init__()
-        self.input_dim = input_dim
-        self.hidden_dim = hidden_dim
-        self.num_layers = num_layers
-        self.dropout = dropout
-        self.batch_size = batch_size
-        self.use_gpu = use_gpu
-        self.model = nn.LSTM(input_size=self.input_dim,
-                             hidden_size=self.hidden_dim,
-                             num_layers=self.num_layers,
-                             batch_first=True,
-                             bidirectional=False,
-                             dropout=self.dropout if not no_dropout else 0)
-        if self.use_gpu:
-            self.model.cuda()
-        self.init_hidden()
-    def init_hidden(self):
-        self.hidden_dict['h'] = torch.zeros((self.num_layers,
-                                             self.batch_size,
-                                             self.hidden_dim),
-                                            requires_grad=False)
-        self.hidden_dict['c'] = torch.zeros((self.num_layers,
-                                             self.batch_size,
-                                             self.hidden_dim),
-                                            requires_grad=False)
-        if self.use_gpu:
-            for each_hidden in self.hidden_dict.keys():
-                self.hidden_dict[each_hidden] = self.hidden_dict[each_hidden].cuda()
-    def forward_one_step(self, tgt_emb_in):
-        ''' one-step forward model
-        Parameters
-        ----------
-        tgt_emb_in : Tensor, shape (batch_size, input_dim)
-        Returns
-        -------
-        Tensor, shape (batch_size, hidden_dim)
-        '''
-        tgt_hidden_out, self.hidden_dict['h'], self.hidden_dict['c'] \
-            = self.model(tgt_emb_in.view(self.batch_size, 1, -1),
-                         self.hidden_dict['h'], self.hidden_dict['c'])
-        return tgt_hidden_out

graph_grammar/nn/encoder.py DELETED Viewed

@@ -1,199 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# Rhizome
-# Version beta 0.0, August 2023
-# Property of IBM Research, Accelerated Discovery
-#
-"""
-PLEASE NOTE THIS IMPLEMENTATION INCLUDES THE ORIGINAL SOURCE CODE (AND SOME ADAPTATIONS)
-OF THE MHG IMPLEMENTATION OF HIROSHI KAJINO AT IBM TRL ALREADY PUBLICLY AVAILABLE.
-THIS MIGHT INFLUENCE THE DECISION OF THE FINAL LICENSE SO CAREFUL CHECK NEEDS BE DONE.
-"""
-""" Title """
-__author__ = "Hiroshi Kajino <KAJINO@jp.ibm.com>"
-__copyright__ = "(c) Copyright IBM Corp. 2018"
-__version__ = "0.1"
-__date__ = "Aug 9 2018"
-import abc
-import numpy as np
-import torch
-import torch.nn.functional as F
-from torch import nn
-from typing import List
-class EncoderBase(nn.Module):
-    def __init__(self):
-        super().__init__()
-    @abc.abstractmethod
-    def forward(self, in_seq):
-        ''' forward model
-        Parameters
-        ----------
-        in_seq_emb : Variable, shape (batch_size, max_len, input_dim)
-        Returns
-        -------
-        hidden_seq_emb : Tensor, shape (batch_size, max_len, 1 + bidirectional, hidden_dim)
-        '''
-        pass
-    @abc.abstractmethod
-    def init_hidden(self):
-        ''' initialize the hidden states
-        '''
-        pass
-class GRUEncoder(EncoderBase):
-    def __init__(self, input_dim: int, hidden_dim: int, num_layers: int,
-                 bidirectional: bool, dropout: float, batch_size: int, use_gpu: bool,
-                 no_dropout=False):
-        super().__init__()
-        self.input_dim = input_dim
-        self.hidden_dim = hidden_dim
-        self.num_layers = num_layers
-        self.bidirectional = bidirectional
-        self.dropout = dropout
-        self.batch_size = batch_size
-        self.use_gpu = use_gpu
-        self.model = nn.GRU(input_size=self.input_dim,
-                            hidden_size=self.hidden_dim,
-                            num_layers=self.num_layers,
-                            batch_first=True,
-                            bidirectional=self.bidirectional,
-                            dropout=self.dropout if not no_dropout else 0)
-        if self.use_gpu:
-            self.model.cuda()
-        self.init_hidden()
-    def init_hidden(self):
-        self.h0 = torch.zeros(((self.bidirectional + 1) * self.num_layers,
-                               self.batch_size,
-                               self.hidden_dim),
-                              requires_grad=False)
-        if self.use_gpu:
-            self.h0 = self.h0.cuda()
-    def forward(self, in_seq_emb):
-        ''' forward model
-        Parameters
-        ----------
-        in_seq_emb : Tensor, shape (batch_size, max_len, input_dim)
-        Returns
-        -------
-        hidden_seq_emb : Tensor, shape (batch_size, max_len, 1 + bidirectional, hidden_dim)
-        '''
-        max_len = in_seq_emb.size(1)
-        hidden_seq_emb, self.h0 = self.model(
-            in_seq_emb, self.h0)
-        hidden_seq_emb = hidden_seq_emb.view(self.batch_size,
-                                             max_len,
-                                             1 + self.bidirectional,
-                                             self.hidden_dim)
-        return hidden_seq_emb
-class LSTMEncoder(EncoderBase):
-    def __init__(self, input_dim: int, hidden_dim: int, num_layers: int,
-                 bidirectional: bool, dropout: float, batch_size: int, use_gpu: bool,
-                 no_dropout=False):
-        super().__init__()
-        self.input_dim = input_dim
-        self.hidden_dim = hidden_dim
-        self.num_layers = num_layers
-        self.bidirectional = bidirectional
-        self.dropout = dropout
-        self.batch_size = batch_size
-        self.use_gpu = use_gpu
-        self.model = nn.LSTM(input_size=self.input_dim,
-                             hidden_size=self.hidden_dim,
-                             num_layers=self.num_layers,
-                             batch_first=True,
-                             bidirectional=self.bidirectional,
-                             dropout=self.dropout if not no_dropout else 0)
-        if self.use_gpu:
-            self.model.cuda()
-        self.init_hidden()
-    def init_hidden(self):
-        self.h0 = torch.zeros(((self.bidirectional + 1) * self.num_layers,
-                               self.batch_size,
-                               self.hidden_dim),
-                              requires_grad=False)
-        self.c0 = torch.zeros(((self.bidirectional + 1) * self.num_layers,
-                               self.batch_size,
-                               self.hidden_dim),
-                              requires_grad=False)
-        if self.use_gpu:
-            self.h0 = self.h0.cuda()
-            self.c0 = self.c0.cuda()
-    def forward(self, in_seq_emb):
-        ''' forward model
-        Parameters
-        ----------
-        in_seq_emb : Tensor, shape (batch_size, max_len, input_dim)
-        Returns
-        -------
-        hidden_seq_emb : Tensor, shape (batch_size, max_len, 1 + bidirectional, hidden_dim)
-        '''
-        max_len = in_seq_emb.size(1)
-        hidden_seq_emb, (self.h0, self.c0) = self.model(
-            in_seq_emb, (self.h0, self.c0))
-        hidden_seq_emb = hidden_seq_emb.view(self.batch_size,
-                                             max_len,
-                                             1 + self.bidirectional,
-                                             self.hidden_dim)
-        return hidden_seq_emb
-class FullConnectedEncoder(EncoderBase):
-    def __init__(self, input_dim: int, hidden_dim: int, max_len: int, hidden_dim_list: List[int],
-                 batch_size: int, use_gpu: bool):
-        super().__init__()
-        self.input_dim = input_dim
-        self.hidden_dim = hidden_dim
-        self.max_len = max_len
-        self.hidden_dim_list = hidden_dim_list
-        self.use_gpu = use_gpu
-        in_out_dim_list = [input_dim * max_len] + list(hidden_dim_list) + [hidden_dim]
-        self.linear_list = nn.ModuleList(
-            [nn.Linear(in_out_dim_list[each_idx], in_out_dim_list[each_idx + 1])\
-             for each_idx in range(len(in_out_dim_list) - 1)])
-    def forward(self, in_seq_emb):
-        ''' forward model
-        Parameters
-        ----------
-        in_seq_emb : Tensor, shape (batch_size, max_len, input_dim)
-        Returns
-        -------
-        hidden_seq_emb : Tensor, shape (batch_size, max_len, 1 + bidirectional, hidden_dim)
-        '''
-        batch_size = in_seq_emb.size(0)
-        x = in_seq_emb.view(batch_size, -1)
-        for each_linear in self.linear_list:
-            x = F.relu(each_linear(x))
-        return x.view(batch_size, 1, -1)
-    def init_hidden(self):
-        pass

graph_grammar/nn/graph.py DELETED Viewed

@@ -1,313 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# Rhizome
-# Version beta 0.0, August 2023
-# Property of IBM Research, Accelerated Discovery
-#
-"""
-PLEASE NOTE THIS IMPLEMENTATION INCLUDES THE ORIGINAL SOURCE CODE (AND SOME ADAPTATIONS)
-OF THE MHG IMPLEMENTATION OF HIROSHI KAJINO AT IBM TRL ALREADY PUBLICLY AVAILABLE.
-THIS MIGHT INFLUENCE THE DECISION OF THE FINAL LICENSE SO CAREFUL CHECK NEEDS BE DONE.
-"""
-""" Title """
-__author__ = "Hiroshi Kajino <KAJINO@jp.ibm.com>"
-__copyright__ = "(c) Copyright IBM Corp. 2018"
-__version__ = "0.1"
-__date__ = "Jan 1 2018"
-import numpy as np
-import torch
-import torch.nn.functional as F
-from graph_grammar.graph_grammar.hrg import ProductionRuleCorpus
-from torch import nn
-from torch.autograd import Variable
-class MolecularProdRuleEmbedding(nn.Module):
-    ''' molecular fingerprint layer
-    '''
-    def __init__(self, prod_rule_corpus, layer2layer_activation, layer2out_activation,
-                 out_dim=32, element_embed_dim=32,
-                 num_layers=3, padding_idx=None, use_gpu=False):
-        super().__init__()
-        if padding_idx is not None:
-            assert padding_idx == -1, 'padding_idx must be -1.'
-        self.prod_rule_corpus = prod_rule_corpus
-        self.layer2layer_activation = layer2layer_activation
-        self.layer2out_activation = layer2out_activation
-        self.out_dim = out_dim
-        self.element_embed_dim = element_embed_dim
-        self.num_layers = num_layers
-        self.padding_idx = padding_idx
-        self.use_gpu = use_gpu
-        self.layer2layer_list = []
-        self.layer2out_list = []
-        if self.use_gpu:
-            self.atom_embed = torch.randn(self.prod_rule_corpus.num_edge_symbol,
-                                          self.element_embed_dim, requires_grad=True).cuda()
-            self.bond_embed = torch.randn(self.prod_rule_corpus.num_node_symbol,
-                                          self.element_embed_dim, requires_grad=True).cuda()
-            self.ext_id_embed = torch.randn(self.prod_rule_corpus.num_ext_id,
-                                            self.element_embed_dim, requires_grad=True).cuda()
-            for _ in range(num_layers):
-                self.layer2layer_list.append(nn.Linear(self.element_embed_dim, self.element_embed_dim).cuda())
-                self.layer2out_list.append(nn.Linear(self.element_embed_dim, self.out_dim).cuda())
-        else:
-            self.atom_embed = torch.randn(self.prod_rule_corpus.num_edge_symbol,
-                                          self.element_embed_dim, requires_grad=True)
-            self.bond_embed = torch.randn(self.prod_rule_corpus.num_node_symbol,
-                                          self.element_embed_dim, requires_grad=True)
-            self.ext_id_embed = torch.randn(self.prod_rule_corpus.num_ext_id,
-                                            self.element_embed_dim, requires_grad=True)
-            for _ in range(num_layers):
-                self.layer2layer_list.append(nn.Linear(self.element_embed_dim, self.element_embed_dim))
-                self.layer2out_list.append(nn.Linear(self.element_embed_dim, self.out_dim))
-    def forward(self, prod_rule_idx_seq):
-        ''' forward model for mini-batch
-        Parameters
-        ----------
-        prod_rule_idx_seq : (batch_size, length)
-        Returns
-        -------
-        Variable, shape (batch_size, length, out_dim)
-        '''
-        batch_size, length = prod_rule_idx_seq.shape
-        if self.use_gpu:
-            out = Variable(torch.zeros((batch_size, length, self.out_dim))).cuda()
-        else:
-            out = Variable(torch.zeros((batch_size, length, self.out_dim)))
-        for each_batch_idx in range(batch_size):
-            for each_idx in range(length):
-                if int(prod_rule_idx_seq[each_batch_idx, each_idx]) == len(self.prod_rule_corpus.prod_rule_list):
-                    continue
-                else:
-                    each_prod_rule = self.prod_rule_corpus.prod_rule_list[int(prod_rule_idx_seq[each_batch_idx, each_idx])]
-                    layer_wise_embed_dict = {each_edge: self.atom_embed[
-                        each_prod_rule.rhs.edge_attr(each_edge)['symbol_idx']]
-                                             for each_edge in each_prod_rule.rhs.edges}
-                    layer_wise_embed_dict.update({each_node: self.bond_embed[
-                        each_prod_rule.rhs.node_attr(each_node)['symbol_idx']]
-                                                  for each_node in each_prod_rule.rhs.nodes})
-                    for each_node in each_prod_rule.rhs.nodes:
-                        if 'ext_id' in each_prod_rule.rhs.node_attr(each_node):
-                            layer_wise_embed_dict[each_node] \
-                                = layer_wise_embed_dict[each_node] \
-                                + self.ext_id_embed[each_prod_rule.rhs.node_attr(each_node)['ext_id']]
-                    for each_layer in range(self.num_layers):
-                        next_layer_embed_dict = {}
-                        for each_edge in each_prod_rule.rhs.edges:
-                            v = layer_wise_embed_dict[each_edge]
-                            for each_node in each_prod_rule.rhs.nodes_in_edge(each_edge):
-                                v = v + layer_wise_embed_dict[each_node]
-                            next_layer_embed_dict[each_edge] = self.layer2layer_activation(self.layer2layer_list[each_layer](v))
-                            out[each_batch_idx, each_idx, :] \
-                                = out[each_batch_idx, each_idx, :] + self.layer2out_activation(self.layer2out_list[each_layer](v))
-                        for each_node in each_prod_rule.rhs.nodes:
-                            v = layer_wise_embed_dict[each_node]
-                            for each_edge in each_prod_rule.rhs.adj_edges(each_node):
-                                v = v + layer_wise_embed_dict[each_edge]
-                            next_layer_embed_dict[each_node] = self.layer2layer_activation(self.layer2layer_list[each_layer](v))
-                            out[each_batch_idx, each_idx, :]\
-                                = out[each_batch_idx, each_idx, :] + self.layer2out_activation(self.layer2out_list[each_layer](v))
-                        layer_wise_embed_dict = next_layer_embed_dict
-        return out
-class MolecularProdRuleEmbeddingLastLayer(nn.Module):
-    ''' molecular fingerprint layer
-    '''
-    def __init__(self, prod_rule_corpus, layer2layer_activation, layer2out_activation,
-                 out_dim=32, element_embed_dim=32,
-                 num_layers=3, padding_idx=None, use_gpu=False):
-        super().__init__()
-        if padding_idx is not None:
-            assert padding_idx == -1, 'padding_idx must be -1.'
-        self.prod_rule_corpus = prod_rule_corpus
-        self.layer2layer_activation = layer2layer_activation
-        self.layer2out_activation = layer2out_activation
-        self.out_dim = out_dim
-        self.element_embed_dim = element_embed_dim
-        self.num_layers = num_layers
-        self.padding_idx = padding_idx
-        self.use_gpu = use_gpu
-        self.layer2layer_list = []
-        self.layer2out_list = []
-        if self.use_gpu:
-            self.atom_embed = nn.Embedding(self.prod_rule_corpus.num_edge_symbol, self.element_embed_dim).cuda()
-            self.bond_embed = nn.Embedding(self.prod_rule_corpus.num_node_symbol, self.element_embed_dim).cuda()
-            for _ in range(num_layers+1):
-                self.layer2layer_list.append(nn.Linear(self.element_embed_dim, self.element_embed_dim).cuda())
-                self.layer2out_list.append(nn.Linear(self.element_embed_dim, self.out_dim).cuda())
-        else:
-            self.atom_embed = nn.Embedding(self.prod_rule_corpus.num_edge_symbol, self.element_embed_dim)
-            self.bond_embed = nn.Embedding(self.prod_rule_corpus.num_node_symbol, self.element_embed_dim)
-            for _ in range(num_layers+1):
-                self.layer2layer_list.append(nn.Linear(self.element_embed_dim, self.element_embed_dim))
-                self.layer2out_list.append(nn.Linear(self.element_embed_dim, self.out_dim))
-    def forward(self, prod_rule_idx_seq):
-        ''' forward model for mini-batch
-        Parameters
-        ----------
-        prod_rule_idx_seq : (batch_size, length)
-        Returns
-        -------
-        Variable, shape (batch_size, length, out_dim)
-        '''
-        batch_size, length = prod_rule_idx_seq.shape
-        if self.use_gpu:
-            out = Variable(torch.zeros((batch_size, length, self.out_dim))).cuda()
-        else:
-            out = Variable(torch.zeros((batch_size, length, self.out_dim)))
-        for each_batch_idx in range(batch_size):
-            for each_idx in range(length):
-                if int(prod_rule_idx_seq[each_batch_idx, each_idx]) == len(self.prod_rule_corpus.prod_rule_list):
-                    continue
-                else:
-                    each_prod_rule = self.prod_rule_corpus.prod_rule_list[int(prod_rule_idx_seq[each_batch_idx, each_idx])]
-                    if self.use_gpu:
-                        layer_wise_embed_dict = {each_edge: self.atom_embed(
-                            Variable(torch.LongTensor(
-                                [each_prod_rule.rhs.edge_attr(each_edge)['symbol_idx']]
-                            ), requires_grad=False).cuda())
-                                                 for each_edge in each_prod_rule.rhs.edges}
-                        layer_wise_embed_dict.update({each_node: self.bond_embed(
-                                                         Variable(
-                                                             torch.LongTensor([
-                                                                     each_prod_rule.rhs.node_attr(each_node)['symbol_idx']]),
-                                                             requires_grad=False).cuda()
-                                                     ) for each_node in each_prod_rule.rhs.nodes})
-                    else:
-                        layer_wise_embed_dict = {each_edge: self.atom_embed(
-                            Variable(torch.LongTensor(
-                                [each_prod_rule.rhs.edge_attr(each_edge)['symbol_idx']]
-                            ), requires_grad=False))
-                                                 for each_edge in each_prod_rule.rhs.edges}
-                        layer_wise_embed_dict.update({each_node: self.bond_embed(
-                                                         Variable(
-                                                             torch.LongTensor([
-                                                                     each_prod_rule.rhs.node_attr(each_node)['symbol_idx']]),
-                                                             requires_grad=False)
-                                                     ) for each_node in each_prod_rule.rhs.nodes})
-                    for each_layer in range(self.num_layers):
-                        next_layer_embed_dict = {}
-                        for each_edge in each_prod_rule.rhs.edges:
-                            v = layer_wise_embed_dict[each_edge]
-                            for each_node in each_prod_rule.rhs.nodes_in_edge(each_edge):
-                                v += layer_wise_embed_dict[each_node]
-                            next_layer_embed_dict[each_edge] = self.layer2layer_activation(self.layer2layer_list[each_layer](v))
-                        for each_node in each_prod_rule.rhs.nodes:
-                            v = layer_wise_embed_dict[each_node]
-                            for each_edge in each_prod_rule.rhs.adj_edges(each_node):
-                                v += layer_wise_embed_dict[each_edge]
-                            next_layer_embed_dict[each_node] = self.layer2layer_activation(self.layer2layer_list[each_layer](v))
-                        layer_wise_embed_dict = next_layer_embed_dict
-                    for each_edge in each_prod_rule.rhs.edges:
-                        out[each_batch_idx, each_idx, :] = self.layer2out_activation(self.layer2out_list[self.num_layers](v))
-                    for each_edge in each_prod_rule.rhs.edges:
-                        out[each_batch_idx, each_idx, :] = self.layer2out_activation(self.layer2out_list[self.num_layers](v))
-        return out
-class MolecularProdRuleEmbeddingUsingFeatures(nn.Module):
-    ''' molecular fingerprint layer
-    '''
-    def __init__(self, prod_rule_corpus, layer2layer_activation, layer2out_activation,
-                 out_dim=32, num_layers=3, padding_idx=None, use_gpu=False):
-        super().__init__()
-        if padding_idx is not None:
-            assert padding_idx == -1, 'padding_idx must be -1.'
-        self.feature_dict, self.feature_dim = prod_rule_corpus.construct_feature_vectors()
-        self.prod_rule_corpus = prod_rule_corpus
-        self.layer2layer_activation = layer2layer_activation
-        self.layer2out_activation = layer2out_activation
-        self.out_dim = out_dim
-        self.num_layers = num_layers
-        self.padding_idx = padding_idx
-        self.use_gpu = use_gpu
-        self.layer2layer_list = []
-        self.layer2out_list = []
-        if self.use_gpu:
-            for each_key in self.feature_dict:
-                self.feature_dict[each_key] = self.feature_dict[each_key].to_dense().cuda()
-            for _ in range(num_layers):
-                self.layer2layer_list.append(nn.Linear(self.feature_dim, self.feature_dim).cuda())
-                self.layer2out_list.append(nn.Linear(self.feature_dim, self.out_dim).cuda())
-        else:
-            for _ in range(num_layers):
-                self.layer2layer_list.append(nn.Linear(self.feature_dim, self.feature_dim))
-                self.layer2out_list.append(nn.Linear(self.feature_dim, self.out_dim))
-    def forward(self, prod_rule_idx_seq):
-        ''' forward model for mini-batch
-        Parameters
-        ----------
-        prod_rule_idx_seq : (batch_size, length)
-        Returns
-        -------
-        Variable, shape (batch_size, length, out_dim)
-        '''
-        batch_size, length = prod_rule_idx_seq.shape
-        if self.use_gpu:
-            out = Variable(torch.zeros((batch_size, length, self.out_dim))).cuda()
-        else:
-            out = Variable(torch.zeros((batch_size, length, self.out_dim)))
-        for each_batch_idx in range(batch_size):
-            for each_idx in range(length):
-                if int(prod_rule_idx_seq[each_batch_idx, each_idx]) == len(self.prod_rule_corpus.prod_rule_list):
-                    continue
-                else:
-                    each_prod_rule = self.prod_rule_corpus.prod_rule_list[int(prod_rule_idx_seq[each_batch_idx, each_idx])]
-                    edge_list = sorted(list(each_prod_rule.rhs.edges))
-                    node_list = sorted(list(each_prod_rule.rhs.nodes))
-                    adj_mat = torch.FloatTensor(each_prod_rule.rhs_adj_mat(edge_list + node_list).todense() + np.identity(len(edge_list)+len(node_list)))
-                    if self.use_gpu:
-                        adj_mat = adj_mat.cuda()
-                    layer_wise_embed = [
-                        self.feature_dict[each_prod_rule.rhs.edge_attr(each_edge)['symbol']]
-                        for each_edge in edge_list]\
-                            + [self.feature_dict[each_prod_rule.rhs.node_attr(each_node)['symbol']]
-                               for each_node in node_list]
-                    for each_node in each_prod_rule.ext_node.values():
-                        layer_wise_embed[each_prod_rule.rhs.num_edges + node_list.index(each_node)] \
-                                = layer_wise_embed[each_prod_rule.rhs.num_edges + node_list.index(each_node)] \
-                                + self.feature_dict[('ext_id', each_prod_rule.rhs.node_attr(each_node)['ext_id'])]
-                    layer_wise_embed = torch.stack(layer_wise_embed)
-                    for each_layer in range(self.num_layers):
-                        message = adj_mat @ layer_wise_embed
-                        next_layer_embed = self.layer2layer_activation(self.layer2layer_list[each_layer](message))
-                        out[each_batch_idx, each_idx, :] \
-                                = out[each_batch_idx, each_idx, :] \
-                                + self.layer2out_activation(self.layer2out_list[each_layer](message)).sum(dim=0)
-                        layer_wise_embed = next_layer_embed
-        return out

load.py DELETED Viewed

@@ -1,83 +0,0 @@
-# -*- coding:utf-8 -*-
-# Rhizome
-# Version beta 0.0, August 2023
-# Property of IBM Research, Accelerated Discovery
-#
-import os
-import pickle
-import sys
-from rdkit import Chem
-import torch
-from torch_geometric.utils.smiles import from_smiles
-from typing import Any, Dict, List, Optional, Union
-from typing_extensions import Self
-from .graph_grammar.io.smi import hg_to_mol
-from .models.mhgvae import GrammarGINVAE
-class PretrainedModelWrapper:
-    model: GrammarGINVAE
-    def __init__(self, model_dict: Dict[str, Any]) -> None:
-        json_params = model_dict['gnn_params']
-        encoder_params = json_params['encoder_params']
-        encoder_params['node_feature_size'] = model_dict['num_features']
-        encoder_params['edge_feature_size'] = model_dict['num_edge_features']
-        self.model = GrammarGINVAE(model_dict['hrg'], rank=-1, encoder_params=encoder_params,
-                                   decoder_params=json_params['decoder_params'],
-                                   prod_rule_embed_params=json_params["prod_rule_embed_params"],
-                                   batch_size=512, max_len=model_dict['max_length'])
-        self.model.load_state_dict(model_dict['model_state_dict'])
-        self.model.eval()
-    def to(self, device: Union[str, int, torch.device]) -> Self:
-        dev_type = type(device)
-        if dev_type != torch.device:
-            if dev_type == str or torch.cuda.is_available():
-                device = torch.device(device)
-            else:
-                device = torch.device("mps", device)
-        self.model = self.model.to(device)
-        return self
-    def encode(self, data: List[str]) -> List[torch.tensor]:
-        # Need to encode them into a graph nn
-        output = []
-        for d in data:
-            params = next(self.model.parameters())
-            g = from_smiles(d)
-            if (g.cpu() and params != 'cpu') or (not g.cpu() and params == 'cpu'):
-                g.to(params.device)
-            ltvec = self.model.graph_embed(g.x, g.edge_index, g.edge_attr, g.batch)
-            output.append(ltvec[0])
-        return output
-    def decode(self, data: List[torch.tensor]) -> List[str]:
-        output = []
-        for d in data:
-            mu, logvar = self.model.get_mean_var(d.unsqueeze(0))
-            z = self.model.reparameterize(mu, logvar)
-            flags, _, hgs = self.model.decode(z)
-            if flags[0]:
-                reconstructed_mol, _ = hg_to_mol(hgs[0], True)
-                output.append(Chem.MolToSmiles(reconstructed_mol))
-            else:
-                output.append(None)
-        return output
-def load(model_name: str = "models/mhg_model/pickles/mhggnn_pretrained_model_0724_2023.pickle") -> Optional[
-    PretrainedModelWrapper]:
-    for p in sys.path:
-        file = p + "/" + model_name
-        if os.path.isfile(file):
-            with open(file, "rb") as f:
-                model_dict = pickle.load(f)
-                return PretrainedModelWrapper(model_dict)
-    return None

mhg_gnn.egg-info/PKG-INFO DELETED Viewed

@@ -1,102 +0,0 @@
-Metadata-Version: 2.1
-Name: mhg-gnn
-Version: 0.0
-Summary: Package for mhg-gnn
-Author: team
-License: TBD
-Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.9
-Description-Content-Type: text/markdown
-Requires-Dist: networkx>=2.8
-Requires-Dist: numpy<2.0.0,>=1.23.5
-Requires-Dist: pandas>=1.5.3
-Requires-Dist: rdkit-pypi<2023.9.6,>=2022.9.4
-Requires-Dist: torch>=2.0.0
-Requires-Dist: torchinfo>=1.8.0
-Requires-Dist: torch-geometric>=2.3.1
-# mhg-gnn
-This repository provides PyTorch source code assosiated with our publication, "MHG-GNN: Combination of Molecular Hypergraph Grammar with Graph Neural Network"
-**Paper:** [Arxiv Link](https://arxiv.org/pdf/2309.16374)
-For more information contact: SEIJITKD@jp.ibm.com
-![mhg-gnn](images/mhg_example1.png)
-## Introduction
-We present MHG-GNN, an autoencoder architecture
-that has an encoder based on GNN and a decoder based on a sequential model with MHG.
-Since the encoder is a GNN variant, MHG-GNN can accept any molecule as input, and
-demonstrate high predictive performance on molecular graph data.
-In addition, the decoder inherits the theoretical guarantee of MHG on always generating a structurally valid molecule as output.
-## Table of Contents
-1. [Getting Started](#getting-started)
-    1. [Pretrained Models and Training Logs](#pretrained-models-and-training-logs)
-    2. [Replicating Conda Environment](#replicating-conda-environment)
-2. [Feature Extraction](#feature-extraction)
-## Getting Started
-**This code and environment have been tested on Intel E5-2667 CPUs at 3.30GHz and NVIDIA A100 Tensor Core GPUs.**
-### Pretrained Models and Training Logs
-We provide checkpoints of the MHG-GNN model pre-trained on a dataset of ~1.34M molecules curated from PubChem. (later) For model weights: [HuggingFace Link]()
-Add the MHG-GNN `pre-trained weights.pt` to the `models/` directory according to your needs.
-### Replacicating Conda Environment
-Follow these steps to replicate our Conda environment and install the necessary libraries:
-```
-conda create --name mhg-gnn-env python=3.8.18
-conda activate mhg-gnn-env
-```
-#### Install Packages with Conda
-```
-conda install -c conda-forge networkx=2.8
-conda install numpy=1.23.5
-# conda install -c conda-forge rdkit=2022.9.4
-conda install pytorch=2.0.0 torchvision torchaudio -c pytorch
-conda install -c conda-forge torchinfo=1.8.0
-conda install pyg -c pyg
-```
-#### Install Packages with pip
-```
-pip install rdkit torch-nl==0.3 torch-scatter torch-sparse
-```
-## Feature Extraction
-The example notebook [mhg-gnn_encoder_decoder_example.ipynb](notebooks/mhg-gnn_encoder_decoder_example.ipynb) contains code to load checkpoint files and use the pre-trained model for encoder and decoder tasks.
-To load mhg-gnn, you can simply use:
-```python
-import torch
-import load
-model = load.load()
-```
-To encode SMILES into embeddings, you can use:
-```python
-with torch.no_grad():
-    repr = model.encode(["CCO", "O=C=O", "OC(=O)c1ccccc1C(=O)O"])
-```
-For decoder, you can use the function, so you can return from embeddings to SMILES strings:
-```python
-orig = model.decode(repr)
-```

mhg_gnn.egg-info/SOURCES.txt DELETED Viewed

@@ -1,46 +0,0 @@
-README.md
-setup.cfg
-setup.py
-./graph_grammar/__init__.py
-./graph_grammar/hypergraph.py
-./graph_grammar/algo/__init__.py
-./graph_grammar/algo/tree_decomposition.py
-./graph_grammar/graph_grammar/__init__.py
-./graph_grammar/graph_grammar/base.py
-./graph_grammar/graph_grammar/corpus.py
-./graph_grammar/graph_grammar/hrg.py
-./graph_grammar/graph_grammar/symbols.py
-./graph_grammar/graph_grammar/utils.py
-./graph_grammar/io/__init__.py
-./graph_grammar/io/smi.py
-./graph_grammar/nn/__init__.py
-./graph_grammar/nn/dataset.py
-./graph_grammar/nn/decoder.py
-./graph_grammar/nn/encoder.py
-./graph_grammar/nn/graph.py
-./models/__init__.py
-./models/mhgvae.py
-graph_grammar/__init__.py
-graph_grammar/hypergraph.py
-graph_grammar/algo/__init__.py
-graph_grammar/algo/tree_decomposition.py
-graph_grammar/graph_grammar/__init__.py
-graph_grammar/graph_grammar/base.py
-graph_grammar/graph_grammar/corpus.py
-graph_grammar/graph_grammar/hrg.py
-graph_grammar/graph_grammar/symbols.py
-graph_grammar/graph_grammar/utils.py
-graph_grammar/io/__init__.py
-graph_grammar/io/smi.py
-graph_grammar/nn/__init__.py
-graph_grammar/nn/dataset.py
-graph_grammar/nn/decoder.py
-graph_grammar/nn/encoder.py
-graph_grammar/nn/graph.py
-mhg_gnn.egg-info/PKG-INFO
-mhg_gnn.egg-info/SOURCES.txt
-mhg_gnn.egg-info/dependency_links.txt
-mhg_gnn.egg-info/requires.txt
-mhg_gnn.egg-info/top_level.txt
-models/__init__.py
-models/mhgvae.py

mhg_gnn.egg-info/dependency_links.txt DELETED Viewed

	@@ -1 +0,0 @@
1	-

mhg_gnn.egg-info/requires.txt DELETED Viewed

@@ -1,7 +0,0 @@
-networkx>=2.8
-numpy<2.0.0,>=1.23.5
-pandas>=1.5.3
-rdkit-pypi<2023.9.6,>=2022.9.4
-torch>=2.0.0
-torchinfo>=1.8.0
-torch-geometric>=2.3.1

mhg_gnn.egg-info/top_level.txt DELETED Viewed

	@@ -1,2 +0,0 @@
1	- graph_grammar
2	- models

pickles/mhggnn_pretrained_model_0724_2023.pickle → mhggnn_pretrained_model_0724_2023.pickle RENAMED Viewed

File without changes

models/__init__.py DELETED Viewed

@@ -1,5 +0,0 @@
-# -*- coding:utf-8 -*-
-# Rhizome
-# Version beta 0.0, August 2023
-# Property of IBM Research, Accelerated Discovery
-#

models/__pycache__/__init__.cpython-310.pyc DELETED Viewed

Binary file (221 Bytes)

models/__pycache__/mhgvae.cpython-310.pyc DELETED Viewed

Binary file (24.8 kB)

models/mhgvae.py DELETED Viewed

@@ -1,956 +0,0 @@
-# -*- coding:utf-8 -*-
-# Rhizome
-# Version beta 0.0, August 2023
-# Property of IBM Research, Accelerated Discovery
-#
-"""
-PLEASE NOTE THIS IMPLEMENTATION INCLUDES ADAPTED SOURCE CODE
-OF THE MHG IMPLEMENTATION OF HIROSHI KAJINO AT IBM TRL ALREADY PUBLICLY AVAILABLE,
-E.G., GRUEncoder/GRUDecoder, GrammarSeq2SeqVAE AND EVEN SOME METHODS OF GrammarGINVAE.
-THIS MIGHT INFLUENCE THE DECISION OF THE FINAL LICENSE SO CAREFUL CHECK NEEDS BE DONE.
-"""
-import numpy as np
-import logging
-import torch
-from torch.autograd import Variable
-import torch.nn as nn
-import torch.nn.functional as F
-from torch.nn.modules.loss import _Loss
-from torch_geometric.nn import MessagePassing
-from torch_geometric.nn import global_add_pool
-from ..graph_grammar.graph_grammar.symbols import NTSymbol
-from ..graph_grammar.nn.encoder import EncoderBase
-from ..graph_grammar.nn.decoder import DecoderBase
-def get_atom_edge_feature_dims():
-    from torch_geometric.utils.smiles import x_map, e_map
-    func = lambda x: len(x[1])
-    return list(map(func, x_map.items())), list(map(func, e_map.items()))
-class FeatureEmbedding(nn.Module):
-    def __init__(self, input_dims, embedded_dim):
-        super().__init__()
-        self.embedding_list = nn.ModuleList()
-        for dim in input_dims:
-            embedding = nn.Embedding(dim, embedded_dim)
-            self.embedding_list.append(embedding)
-    def forward(self, x):
-        output = 0
-        for i in range(x.shape[1]):
-            input = x[:, i].to(torch.int)
-            device = next(self.parameters()).device
-            if device != input.device:
-                input = input.to(device)
-            emb = self.embedding_list[i](input)
-            output += emb
-        return output
-class GRUEncoder(EncoderBase):
-    def __init__(self, input_dim: int, hidden_dim: int, num_layers: int,
-                 bidirectional: bool, dropout: float, batch_size: int, rank: int=-1,
-                 no_dropout: bool=False):
-        super().__init__()
-        self.input_dim = input_dim
-        self.hidden_dim = hidden_dim
-        self.num_layers = num_layers
-        self.bidirectional = bidirectional
-        self.dropout = dropout
-        self.batch_size = batch_size
-        self.rank = rank
-        self.model = nn.GRU(input_size=self.input_dim,
-                            hidden_size=self.hidden_dim,
-                            num_layers=self.num_layers,
-                            batch_first=True,
-                            bidirectional=self.bidirectional,
-                            dropout=self.dropout if not no_dropout else 0)
-        if self.rank >= 0:
-            if torch.cuda.is_available():
-                self.model = self.model.to(rank)
-            else:
-                # support mac mps
-                self.model = self.model.to(torch.device("mps", rank))
-        self.init_hidden(self.batch_size)
-    def init_hidden(self, bsize):
-        self.h0 = torch.zeros(((self.bidirectional + 1) * self.num_layers,
-                               min(self.batch_size, bsize),
-                               self.hidden_dim),
-                              requires_grad=False)
-        if self.rank >= 0:
-            if torch.cuda.is_available():
-                self.h0 = self.h0.to(self.rank)
-            else:
-                # support mac mps
-                self.h0 = self.h0.to(torch.device("mps", self.rank))
-    def to(self, device):
-        newself = super().to(device)
-        newself.model = newself.model.to(device)
-        newself.h0 = newself.h0.to(device)
-        newself.rank = next(newself.parameters()).get_device()
-        return newself
-    def forward(self, in_seq_emb):
-        ''' forward model
-        Parameters
-        ----------
-        in_seq_emb : Tensor, shape (batch_size, max_len, input_dim)
-        Returns
-        -------
-        hidden_seq_emb : Tensor, shape (batch_size, max_len, 1 + bidirectional, hidden_dim)
-        '''
-        # Kishi: I think original MHG had this init_hidden()
-        self.init_hidden(in_seq_emb.size(0))
-        max_len = in_seq_emb.size(1)
-        hidden_seq_emb, self.h0 = self.model(
-            in_seq_emb, self.h0)
-        # As shown as returns, convert hidden_seq_emb: (batch_size, seq_len, (1 or 2) * hidden_size) -->
-        # (batch_size, seq_len, 1 or 2, hidden_size)
-        # In the original input the original GRU/LSTM with bidirectional encoding
-        # has contactinated tensors
-        # (first half for forward RNN, latter half for backward RNN)
-        # so convert them in a more friendly format packed for each RNN
-        hidden_seq_emb = hidden_seq_emb.view(-1,
-                                             max_len,
-                                             1 + self.bidirectional,
-                                             self.hidden_dim)
-        return hidden_seq_emb
-class GRUDecoder(DecoderBase):
-    def __init__(self, input_dim: int, hidden_dim: int, num_layers: int,
-                 dropout: float, batch_size: int, rank: int=-1,
-                 no_dropout: bool=False):
-        super().__init__()
-        self.input_dim = input_dim
-        self.hidden_dim = hidden_dim
-        self.num_layers = num_layers
-        self.dropout = dropout
-        self.batch_size = batch_size
-        self.rank = rank
-        self.model = nn.GRU(input_size=self.input_dim,
-                            hidden_size=self.hidden_dim,
-                            num_layers=self.num_layers,
-                            batch_first=True,
-                            bidirectional=False,
-                            dropout=self.dropout if not no_dropout else 0
-        )
-        if self.rank >= 0:
-            if torch.cuda.is_available():
-                self.model = self.model.to(self.rank)
-            else:
-                # support mac mps
-                self.model = self.model.to(torch.device("mps", self.rank))
-        self.init_hidden(self.batch_size)
-    def init_hidden(self, bsize):
-        self.hidden_dict['h'] = torch.zeros((self.num_layers,
-                                             min(self.batch_size, bsize),
-                                             self.hidden_dim),
-                                            requires_grad=False)
-        if self.rank >= 0:
-            if torch.cuda.is_available():
-                self.hidden_dict['h'] = self.hidden_dict['h'].to(self.rank)
-            else:
-                self.hidden_dict['h'] = self.hidden_dict['h'].to(torch.device("mps", self.rank))
-    def to(self, device):
-        newself = super().to(device)
-        newself.model = newself.model.to(device)
-        for k in self.hidden_dict.keys():
-            newself.hidden_dict[k] = newself.hidden_dict[k].to(device)
-        newself.rank = next(newself.parameters()).get_device()
-        return newself
-    def forward_one_step(self, tgt_emb_in):
-        ''' one-step forward model
-        Parameters
-        ----------
-        tgt_emb_in : Tensor, shape (batch_size, input_dim)
-        Returns
-        -------
-        Tensor, shape (batch_size, hidden_dim)
-        '''
-        bsize = tgt_emb_in.size(0)
-        tgt_emb_out, self.hidden_dict['h'] \
-            = self.model(tgt_emb_in.view(bsize, 1, -1),
-                         self.hidden_dict['h'])
-        return tgt_emb_out
-class NodeMLP(nn.Module):
-    def __init__(self, input_size, output_size, hidden_size):
-        super().__init__()
-        self.lin1 = nn.Linear(input_size, hidden_size)
-        self.nbat = nn.BatchNorm1d(hidden_size)
-        self.lin2 = nn.Linear(hidden_size, output_size)
-    def forward(self, x):
-        x = self.lin1(x)
-        x = self.nbat(x)
-        x = x.relu()
-        x = self.lin2(x)
-        return x
-class GINLayer(MessagePassing):
-    def __init__(self, node_input_size, node_output_size, node_hidden_size, edge_input_size):
-        super().__init__()
-        self.node_mlp = NodeMLP(node_input_size, node_output_size, node_hidden_size)
-        self.edge_mlp = FeatureEmbedding(edge_input_size, node_output_size)
-        self.eps = nn.Parameter(torch.tensor([0.0]))
-    def forward(self, x, edge_index, edge_attr):
-        msg = self.propagate(edge_index, x=x ,edge_attr=edge_attr)
-        x = (1.0 + self.eps) * x + msg
-        x = x.relu()
-        x = self.node_mlp(x)
-        return x
-    def message(self, x_j, edge_attr):
-        edge_attr = self.edge_mlp(edge_attr)
-        x_j = x_j + edge_attr
-        x_j = x_j.relu()
-        return x_j
-    def update(self, aggr_out):
-        return aggr_out
-#TODO implement the case where features of atoms and edges are considered
-# Check GraphMVP and ogb (open graph benchmark) to realize this
-class GIN(torch.nn.Module):
-    def __init__(self, node_feature_size, edge_feature_size, hidden_channels=64,
-                 proximity_size=3, dropout=0.1):
-        super().__init__()
-        #print("(num node features, num edge features)=", (node_feature_size, edge_feature_size))
-        hsize = hidden_channels * 2
-        atom_dim, edge_dim = get_atom_edge_feature_dims()
-        self.trans = FeatureEmbedding(atom_dim, hidden_channels)
-        ml = []
-        for _ in range(proximity_size):
-            ml.append(GINLayer(hidden_channels, hidden_channels, hsize, edge_dim))
-        self.mlist = nn.ModuleList(ml)
-        #It is possible to calculate relu with x.relu() where x is an output
-        #self.activations = nn.ModuleList(actl)
-        self.dropout = dropout
-        self.proximity_size = proximity_size
-    def forward(self, x, edge_index, edge_attr, batch_size):
-        x = x.to(torch.float)
-        #print("before: edge_weight.shape=", edge_attr.shape)
-        edge_attr = edge_attr.to(torch.float)
-        #print("after: edge_weight.shape=", edge_attr.shape)
-        x = self.trans(x)
-        # TODO Check if this x is consistent with global_add_pool
-        hlist = [global_add_pool(x, batch_size)]
-        for id, m in enumerate(self.mlist):
-            x = m(x, edge_index=edge_index, edge_attr=edge_attr)
-            #print("Done with one layer")
-            ###if id != self.proximity_size - 1:
-            x = x.relu()
-            x = F.dropout(x, p=self.dropout, training=self.training)
-            #h = global_mean_pool(x, batch_size)
-            h = global_add_pool(x, batch_size)
-            hlist.append(h)
-            #print("Done with one relu call: x.shape=", x.shape)
-        #print("calling golbal mean pool")
-        #print("calling dropout x.shape=", x.shape)
-        #print("x=", x)
-        #print("hlist[0].shape=", hlist[0].shape)
-        x = torch.cat(hlist, dim=1)
-        #print("x.shape=", x.shape)
-        x = F.dropout(x, p=self.dropout, training=self.training)
-        return x
-# TODO copied from MHG implementation and adapted here.
-class GrammarSeq2SeqVAE(nn.Module):
-    '''
-    Variational seq2seq with grammar.
-    TODO: rewrite this class using mixin
-    '''
-    def __init__(self, hrg, rank=-1, latent_dim=64, max_len=80,
-                 batch_size=64, padding_idx=-1,
-                 encoder_params={'hidden_dim': 384, 'num_layers': 3, 'bidirectional': True,
-                                 'dropout': 0.1},
-                 decoder_params={'hidden_dim': 384, #'num_layers': 2,
-                                 'num_layers': 3,
-                                 'dropout': 0.1},
-                 prod_rule_embed_params={'out_dim': 128},
-                 no_dropout=False):
-        super().__init__()
-        # TODO USE GRU FOR ENCODING AND DECODING
-        self.hrg = hrg
-        self.rank = rank
-        self.prod_rule_corpus = hrg.prod_rule_corpus
-        self.prod_rule_embed_params = prod_rule_embed_params
-        self.vocab_size = hrg.num_prod_rule + 1
-        self.batch_size = batch_size
-        self.padding_idx = np.mod(padding_idx, self.vocab_size)
-        self.no_dropout = no_dropout
-        self.latent_dim = latent_dim
-        self.max_len = max_len
-        self.encoder_params = encoder_params
-        self.decoder_params = decoder_params
-        # TODO Simple embedding is used. Check if a domain-dependent embedding works or not.
-        embed_out_dim = self.prod_rule_embed_params['out_dim']
-        #use MolecularProdRuleEmbedding later on
-        self.src_embedding = nn.Embedding(self.vocab_size, embed_out_dim,
-                                          padding_idx=self.padding_idx)
-        self.tgt_embedding = nn.Embedding(self.vocab_size, embed_out_dim,
-                                          padding_idx=self.padding_idx)
-        # USE a GRU-based encoder in MHG
-        self.encoder = GRUEncoder(input_dim=embed_out_dim, batch_size=self.batch_size,
-                                      rank=self.rank, no_dropout=self.no_dropout,
-                                  **self.encoder_params)
-        lin_dim = (self.encoder_params.get('bidirectional', False) + 1) * self.encoder_params['hidden_dim']
-        lin_out_dim = self.latent_dim
-        self.hidden2mean = nn.Linear(lin_dim, lin_out_dim, bias=False)
-        self.hidden2logvar = nn.Linear(lin_dim, lin_out_dim)
-        # USE a GRU-based decoder in MHG
-        self.decoder = GRUDecoder(input_dim=embed_out_dim, batch_size=self.batch_size,
-                                  rank=self.rank, no_dropout=self.no_dropout, **self.decoder_params)
-        self.latent2tgt_emb = nn.Linear(self.latent_dim, embed_out_dim)
-        self.latent2hidden_dict = nn.ModuleDict()
-        dec_lin_out_dim = self.decoder_params['hidden_dim']
-        for each_hidden in self.decoder.hidden_dict.keys():
-            self.latent2hidden_dict[each_hidden] = nn.Linear(self.latent_dim, dec_lin_out_dim)
-            if self.rank >= 0:
-                if torch.cuda.is_available():
-                    self.latent2hidden_dict[each_hidden] = self.latent2hidden_dict[each_hidden].to(self.rank)
-                else:
-                    # support mac mps
-                    self.latent2hidden_dict[each_hidden] = self.latent2hidden_dict[each_hidden].to(torch.device("mps", self.rank))
-        self.dec2vocab = nn.Linear(dec_lin_out_dim, self.vocab_size)
-        self.encoder.init_hidden(self.batch_size)
-        self.decoder.init_hidden(self.batch_size)
-        # TODO Do we need this?
-        if hasattr(self.src_embedding, 'weight'):
-            self.src_embedding.weight.data.uniform_(-0.1, 0.1)
-        if hasattr(self.tgt_embedding, 'weight'):
-            self.tgt_embedding.weight.data.uniform_(-0.1, 0.1)
-        self.encoder.init_hidden(self.batch_size)
-        self.decoder.init_hidden(self.batch_size)
-    def to(self, device):
-        newself = super().to(device)
-        newself.src_embedding = newself.src_embedding.to(device)
-        newself.tgt_embedding = newself.tgt_embedding.to(device)
-        newself.encoder = newself.encoder.to(device)
-        newself.decoder = newself.decoder.to(device)
-        newself.dec2vocab = newself.dec2vocab.to(device)
-        newself.hidden2mean = newself.hidden2mean.to(device)
-        newself.hidden2logvar = newself.hidden2logvar.to(device)
-        newself.latent2tgt_emb = newself.latent2tgt_emb.to(device)
-        newself.latent2hidden_dict = newself.latent2hidden_dict.to(device)
-        return newself
-    def forward(self, in_seq, out_seq):
-        ''' forward model
-        Parameters
-        ----------
-        in_seq : Variable, shape (batch_size, length)
-            each element corresponds to word index.
-            where the index should be less than `vocab_size`
-        Returns
-        -------
-        Variable, shape (batch_size, length, vocab_size)
-            logit of each word (applying softmax yields the probability)
-        '''
-        mu, logvar = self.encode(in_seq)
-        z = self.reparameterize(mu, logvar)
-        return self.decode(z, out_seq), mu, logvar
-    def encode(self, in_seq):
-        src_emb = self.src_embedding(in_seq)
-        src_h = self.encoder.forward(src_emb)
-        if self.encoder_params.get('bidirectional', False):
-            concat_src_h = torch.cat((src_h[:, -1, 0, :], src_h[:, 0, 1, :]), dim=1)
-            return self.hidden2mean(concat_src_h), self.hidden2logvar(concat_src_h)
-        else:
-            return self.hidden2mean(src_h[:, -1, :]), self.hidden2logvar(src_h[:, -1, :])
-    def reparameterize(self, mu, logvar, training=True):
-        if training:
-            std = logvar.mul(0.5).exp_()
-            device = next(self.parameters()).device
-            eps = Variable(std.data.new(std.size()).normal_())
-            if device != eps.get_device():
-                eps.to(device)
-            return eps.mul(std).add_(mu)
-        else:
-            return mu
-    #TODO Not tested. Need to implement this in case of molecular structure generation
-    def sample(self, sample_size=-1, deterministic=True, return_z=False):
-        self.eval()
-        self.init_hidden()
-        if sample_size == -1:
-            sample_size = self.batch_size
-        num_iter = int(np.ceil(sample_size / self.batch_size))
-        hg_list = []
-        z_list = []
-        for _ in range(num_iter):
-            z = Variable(torch.normal(
-                torch.zeros(self.batch_size, self.latent_dim),
-                torch.ones(self.batch_size * self.latent_dim))).cuda()
-            _, each_hg_list = self.decode(z, deterministic=deterministic)
-            z_list.append(z)
-            hg_list += each_hg_list
-        z = torch.cat(z_list)[:sample_size]
-        hg_list = hg_list[:sample_size]
-        if return_z:
-            return hg_list, z.cpu().detach().numpy()
-        else:
-            return hg_list
-    def decode(self, z=None, out_seq=None, deterministic=True):
-        if z is None:
-            z = Variable(torch.normal(
-                torch.zeros(self.batch_size, self.latent_dim),
-                torch.ones(self.batch_size * self.latent_dim)))
-        if self.rank >= 0:
-            z = z.to(next(self.parameters()).device)
-        hidden_dict_0 = {}
-        for each_hidden in self.latent2hidden_dict.keys():
-            hidden_dict_0[each_hidden] = self.latent2hidden_dict[each_hidden](z)
-        bsize = z.size(0)
-        self.decoder.init_hidden(bsize)
-        self.decoder.feed_hidden(hidden_dict_0)
-        if out_seq is not None:
-            tgt_emb0 = self.latent2tgt_emb(z)
-            tgt_emb0 = tgt_emb0.view(tgt_emb0.shape[0], 1, tgt_emb0.shape[1])
-            out_seq_emb = self.tgt_embedding(out_seq)
-            tgt_emb = torch.cat((tgt_emb0, out_seq_emb), dim=1)[:, :-1, :]
-            tgt_emb_pred_list = []
-            for each_idx in range(self.max_len):
-                tgt_emb_pred = self.decoder.forward_one_step(tgt_emb[:, each_idx, :].view(bsize, 1, -1))
-                tgt_emb_pred_list.append(tgt_emb_pred)
-            vocab_logit = self.dec2vocab(torch.cat(tgt_emb_pred_list, dim=1))
-            return vocab_logit
-        else:
-            with torch.no_grad():
-                tgt_emb = self.latent2tgt_emb(z)
-                tgt_emb = tgt_emb.view(tgt_emb.shape[0], 1, tgt_emb.shape[1])
-                tgt_emb_pred_list = []
-                stack_list = []
-                hg_list = []
-                nt_symbol_list = []
-                nt_edge_list = []
-                gen_finish_list = []
-                for _ in range(bsize):
-                    stack_list.append([])
-                    hg_list.append(None)
-                    nt_symbol_list.append(NTSymbol(degree=0,
-                                                   is_aromatic=False,
-                                                   bond_symbol_list=[]))
-                    nt_edge_list.append(None)
-                    gen_finish_list.append(False)
-                for idx in range(self.max_len):
-                    tgt_emb_pred = self.decoder.forward_one_step(tgt_emb)
-                    tgt_emb_pred_list.append(tgt_emb_pred)
-                    vocab_logit = self.dec2vocab(tgt_emb_pred)
-                    for each_batch_idx in range(bsize):
-                        if not gen_finish_list[each_batch_idx]: # if generation has not finished
-                            # get production rule greedily
-                            prod_rule = self.hrg.prod_rule_corpus.sample(vocab_logit[each_batch_idx, :, :-1].squeeze().cpu().numpy(),
-                                                                         nt_symbol_list[each_batch_idx],
-                                                                         deterministic=deterministic)
-                            # convert production rule into an index
-                            tgt_id = self.hrg.prod_rule_list.index(prod_rule)
-                            # apply the production rule
-                            hg_list[each_batch_idx], nt_edges = prod_rule.applied_to(hg_list[each_batch_idx], nt_edge_list[each_batch_idx])
-                            # add non-terminals to the stack
-                            stack_list[each_batch_idx].extend(nt_edges[::-1])
-                            # if the stack size is 0, generation has finished!
-                            if len(stack_list[each_batch_idx]) == 0:
-                                gen_finish_list[each_batch_idx] = True
-                            else:
-                                nt_edge_list[each_batch_idx] = stack_list[each_batch_idx].pop()
-                                nt_symbol_list[each_batch_idx] = hg_list[each_batch_idx].edge_attr(nt_edge_list[each_batch_idx])['symbol']
-                        else:
-                            tgt_id = np.mod(self.padding_idx, self.vocab_size)
-                        indice_tensor = torch.LongTensor([tgt_id])
-                        device = next(self.parameters()).device
-                        if indice_tensor.device != device:
-                            indice_tensor = indice_tensor.to(device)
-                        tgt_emb[each_batch_idx, :] = self.tgt_embedding(indice_tensor)
-                vocab_logit = self.dec2vocab(torch.cat(tgt_emb_pred_list, dim=1))
-                #for id, v in enumerate(gen_finish_list):
-                #if not v:
-                #    print("bacth id={} not finished generating a sequence: ".format(id))
-                return gen_finish_list, vocab_logit, hg_list
-# TODO A lot of duplicates with GrammarVAE. Clean up it if necessary
-class GrammarGINVAE(nn.Module):
-    '''
-    Variational autoencoder based on GIN and grammar
-    '''
-    def __init__(self, hrg, rank=-1, max_len=80,
-                 batch_size=64, padding_idx=-1,
-                 encoder_params={'node_feature_size': 4, 'edge_feature_size': 3,
-                                 'hidden_channels': 64, 'proximity_size': 3,
-                                 'dropout': 0.1},
-                 decoder_params={'hidden_dim': 384, 'num_layers': 3,
-                                 'dropout': 0.1},
-                 prod_rule_embed_params={'out_dim': 128},
-                 no_dropout=False):
-        super().__init__()
-        # TODO USE GRU FOR ENCODING AND DECODING
-        self.hrg = hrg
-        self.rank = rank
-        self.prod_rule_corpus = hrg.prod_rule_corpus
-        self.prod_rule_embed_params = prod_rule_embed_params
-        self.vocab_size = hrg.num_prod_rule + 1
-        self.batch_size = batch_size
-        self.padding_idx = np.mod(padding_idx, self.vocab_size)
-        self.no_dropout = no_dropout
-        self.max_len = max_len
-        self.encoder_params = encoder_params
-        self.decoder_params = decoder_params
-        # TODO Simple embedding is used. Check if a domain-dependent embedding works or not.
-        embed_out_dim = self.prod_rule_embed_params['out_dim']
-        #use MolecularProdRuleEmbedding later on
-        self.tgt_embedding = nn.Embedding(self.vocab_size, embed_out_dim,
-                                          padding_idx=self.padding_idx)
-        self.encoder = GIN(**self.encoder_params)
-        self.latent_dim = self.encoder_params['hidden_channels']
-        self.proximity_size = self.encoder_params['proximity_size']
-        hidden_dim = self.decoder_params['hidden_dim']
-        self.hidden2mean = nn.Linear(self.latent_dim * (1 + self.proximity_size), self.latent_dim, bias=False)
-        self.hidden2logvar = nn.Linear(self.latent_dim * (1 + self.proximity_size), self.latent_dim)
-        self.decoder = GRUDecoder(input_dim=embed_out_dim, batch_size=self.batch_size,
-                                  rank=self.rank, no_dropout=self.no_dropout, **self.decoder_params)
-        self.latent2tgt_emb = nn.Linear(self.latent_dim, embed_out_dim)
-        self.latent2hidden_dict = nn.ModuleDict()
-        for each_hidden in self.decoder.hidden_dict.keys():
-            self.latent2hidden_dict[each_hidden] = nn.Linear(self.latent_dim, hidden_dim)
-            if self.rank >= 0:
-                if torch.cuda.is_available():
-                    self.latent2hidden_dict[each_hidden] = self.latent2hidden_dict[each_hidden].to(self.rank)
-                else:
-                    # support mac mps
-                    self.latent2hidden_dict[each_hidden] = self.latent2hidden_dict[each_hidden].to(torch.device("mps", self.rank))
-        self.dec2vocab = nn.Linear(hidden_dim, self.vocab_size)
-        self.decoder.init_hidden(self.batch_size)
-        # TODO Do we need this?
-        if hasattr(self.tgt_embedding, 'weight'):
-            self.tgt_embedding.weight.data.uniform_(-0.1, 0.1)
-        self.decoder.init_hidden(self.batch_size)
-    def to(self, device):
-        newself = super().to(device)
-        newself.encoder = newself.encoder.to(device)
-        newself.decoder = newself.decoder.to(device)
-        newself.rank = next(newself.encoder.parameters()).get_device()
-        return newself
-    def forward(self, x, edge_index, edge_attr, batch_size, out_seq=None, sched_prob = None):
-        mu, logvar = self.encode(x, edge_index, edge_attr, batch_size)
-        z = self.reparameterize(mu, logvar)
-        return self.decode(z, out_seq, sched_prob=sched_prob), mu, logvar
-    #TODO Not tested. Need to implement this in case of molecular structure generation
-    def sample(self, sample_size=-1, deterministic=True, return_z=False):
-        self.eval()
-        self.init_hidden()
-        if sample_size == -1:
-            sample_size = self.batch_size
-        num_iter = int(np.ceil(sample_size / self.batch_size))
-        hg_list = []
-        z_list = []
-        for _ in range(num_iter):
-            z = Variable(torch.normal(
-                torch.zeros(self.batch_size, self.latent_dim),
-                torch.ones(self.batch_size * self.latent_dim))).cuda()
-            _, each_hg_list = self.decode(z, deterministic=deterministic)
-            z_list.append(z)
-            hg_list += each_hg_list
-        z = torch.cat(z_list)[:sample_size]
-        hg_list = hg_list[:sample_size]
-        if return_z:
-            return hg_list, z.cpu().detach().numpy()
-        else:
-            return hg_list
-    def decode(self, z=None, out_seq=None, deterministic=True, sched_prob=None):
-        if z is None:
-            z = Variable(torch.normal(
-                torch.zeros(self.batch_size, self.latent_dim),
-                torch.ones(self.batch_size * self.latent_dim)))
-        if self.rank >= 0:
-            z = z.to(next(self.parameters()).device)
-        hidden_dict_0 = {}
-        for each_hidden in self.latent2hidden_dict.keys():
-            hidden_dict_0[each_hidden] = self.latent2hidden_dict[each_hidden](z)
-        bsize = z.size(0)
-        self.decoder.init_hidden(bsize)
-        self.decoder.feed_hidden(hidden_dict_0)
-        if out_seq is not None:
-            tgt_emb0 = self.latent2tgt_emb(z)
-            tgt_emb0 = tgt_emb0.view(tgt_emb0.shape[0], 1, tgt_emb0.shape[1])
-            out_seq_emb = self.tgt_embedding(out_seq)
-            tgt_emb = torch.cat((tgt_emb0, out_seq_emb), dim=1)[:, :-1, :]
-            tgt_emb_pred_list = []
-            tgt_emb_pred = None
-            for each_idx in range(self.max_len):
-                if tgt_emb_pred is None or sched_prob is None or torch.rand(1)[0] <= sched_prob:
-                    inp = tgt_emb[:, each_idx, :].view(bsize, 1, -1)
-                else:
-                    cur_logit = self.dec2vocab(tgt_emb_pred)
-                    yi = torch.argmax(cur_logit, dim=2)
-                    inp = self.tgt_embedding(yi)
-                tgt_emb_pred = self.decoder.forward_one_step(inp)
-                tgt_emb_pred_list.append(tgt_emb_pred)
-            vocab_logit = self.dec2vocab(torch.cat(tgt_emb_pred_list, dim=1))
-            return vocab_logit
-        else:
-            with torch.no_grad():
-                tgt_emb = self.latent2tgt_emb(z)
-                tgt_emb = tgt_emb.view(tgt_emb.shape[0], 1, tgt_emb.shape[1])
-                tgt_emb_pred_list = []
-                stack_list = []
-                hg_list = []
-                nt_symbol_list = []
-                nt_edge_list = []
-                gen_finish_list = []
-                for _ in range(bsize):
-                    stack_list.append([])
-                    hg_list.append(None)
-                    nt_symbol_list.append(NTSymbol(degree=0,
-                                                   is_aromatic=False,
-                                                   bond_symbol_list=[]))
-                    nt_edge_list.append(None)
-                    gen_finish_list.append(False)
-                for _ in range(self.max_len):
-                    tgt_emb_pred = self.decoder.forward_one_step(tgt_emb)
-                    tgt_emb_pred_list.append(tgt_emb_pred)
-                    vocab_logit = self.dec2vocab(tgt_emb_pred)
-                    for each_batch_idx in range(bsize):
-                        if not gen_finish_list[each_batch_idx]: # if generation has not finished
-                            # get production rule greedily
-                            prod_rule = self.hrg.prod_rule_corpus.sample(vocab_logit[each_batch_idx, :, :-1].squeeze().cpu().numpy(),
-                                                                         nt_symbol_list[each_batch_idx],
-                                                                         deterministic=deterministic)
-                            # convert production rule into an index
-                            tgt_id = self.hrg.prod_rule_list.index(prod_rule)
-                            # apply the production rule
-                            hg_list[each_batch_idx], nt_edges = prod_rule.applied_to(hg_list[each_batch_idx], nt_edge_list[each_batch_idx])
-                            # add non-terminals to the stack
-                            stack_list[each_batch_idx].extend(nt_edges[::-1])
-                            # if the stack size is 0, generation has finished!
-                            if len(stack_list[each_batch_idx]) == 0:
-                                gen_finish_list[each_batch_idx] = True
-                            else:
-                                nt_edge_list[each_batch_idx] = stack_list[each_batch_idx].pop()
-                                nt_symbol_list[each_batch_idx] = hg_list[each_batch_idx].edge_attr(nt_edge_list[each_batch_idx])['symbol']
-                        else:
-                            tgt_id = np.mod(self.padding_idx, self.vocab_size)
-                        indice_tensor = torch.LongTensor([tgt_id])
-                        if self.rank >= 0:
-                            indice_tensor = indice_tensor.to(next(self.parameters()).device)
-                        tgt_emb[each_batch_idx, :] = self.tgt_embedding(indice_tensor)
-                vocab_logit = self.dec2vocab(torch.cat(tgt_emb_pred_list, dim=1))
-                return gen_finish_list, vocab_logit, hg_list
-    #TODO Not tested. Need to implement this in case of molecular structure generation
-    def conditional_distribution(self, z, tgt_id_list):
-        self.eval()
-        self.init_hidden()
-        z = z.cuda()
-        hidden_dict_0 = {}
-        for each_hidden in self.latent2hidden_dict.keys():
-            hidden_dict_0[each_hidden] = self.latent2hidden_dict[each_hidden](z)
-        self.decoder.feed_hidden(hidden_dict_0)
-        with torch.no_grad():
-            tgt_emb = self.latent2tgt_emb(z)
-            tgt_emb = tgt_emb.view(tgt_emb.shape[0], 1, tgt_emb.shape[1])
-            nt_symbol_list = []
-            stack_list = []
-            hg_list = []
-            nt_edge_list = []
-            gen_finish_list = []
-            for _ in range(self.batch_size):
-                nt_symbol_list.append(NTSymbol(degree=0,
-                                               is_aromatic=False,
-                                               bond_symbol_list=[]))
-                stack_list.append([])
-                hg_list.append(None)
-                nt_edge_list.append(None)
-                gen_finish_list.append(False)
-            for each_position in range(len(tgt_id_list[0])):
-                tgt_emb_pred = self.decoder.forward_one_step(tgt_emb)
-                for each_batch_idx in range(self.batch_size):
-                    if not gen_finish_list[each_batch_idx]: # if generation has not finished
-                        # use the prespecified target ids
-                        tgt_id = tgt_id_list[each_batch_idx][each_position]
-                        prod_rule = self.hrg.prod_rule_list[tgt_id]
-                        # apply the production rule
-                        hg_list[each_batch_idx], nt_edges = prod_rule.applied_to(hg_list[each_batch_idx], nt_edge_list[each_batch_idx])
-                        # add non-terminals to the stack
-                        stack_list[each_batch_idx].extend(nt_edges[::-1])
-                        # if the stack size is 0, generation has finished!
-                        if len(stack_list[each_batch_idx]) == 0:
-                            gen_finish_list[each_batch_idx] = True
-                        else:
-                            nt_edge_list[each_batch_idx] = stack_list[each_batch_idx].pop()
-                            nt_symbol_list[each_batch_idx] = hg_list[each_batch_idx].edge_attr(nt_edge_list[each_batch_idx])['symbol']
-                    else:
-                        tgt_id = np.mod(self.padding_idx, self.vocab_size)
-                    indice_tensor = torch.LongTensor([tgt_id])
-                    indice_tensor = indice_tensor.cuda()
-                    tgt_emb[each_batch_idx, :] = self.tgt_embedding(indice_tensor)
-            # last one step
-            conditional_logprob_list = []
-            tgt_emb_pred = self.decoder.forward_one_step(tgt_emb)
-            vocab_logit = self.dec2vocab(tgt_emb_pred)
-            for each_batch_idx in range(self.batch_size):
-                if not gen_finish_list[each_batch_idx]: # if generation has not finished
-                    # get production rule greedily
-                    masked_logprob = self.hrg.prod_rule_corpus.masked_logprob(
-                        vocab_logit[each_batch_idx, :, :-1].squeeze().cpu().numpy(),
-                        nt_symbol_list[each_batch_idx])
-                    conditional_logprob_list.append(masked_logprob)
-                else:
-                    conditional_logprob_list.append(None)
-        return conditional_logprob_list
-    #TODO Not tested. Need to implement this in case of molecular structure generation
-    def decode_with_beam_search(self, z, beam_width=1):
-        ''' Decode a latent vector using beam search.
-        Parameters
-        ----------
-        z
-            latent vector
-        beam_width : int
-            parameter for beam search
-        Returns
-        -------
-        List of Hypergraphs
-        '''
-        if self.batch_size != 1:
-            raise ValueError('this method works only under batch_size=1')
-        if self.padding_idx != -1:
-            raise ValueError('this method works only under padding_idx=-1')
-        top_k_tgt_id_list = [[]] * beam_width
-        logprob_list = [0.] * beam_width
-        for each_len in range(self.max_len):
-            expanded_logprob_list = np.repeat(logprob_list, self.vocab_size) # including padding_idx
-            expanded_length_list = np.array([0] * (beam_width * self.vocab_size))
-            for each_beam_idx, each_candidate in enumerate(top_k_tgt_id_list):
-                conditional_logprob = self.conditional_distribution(z, [each_candidate])[0]
-                if conditional_logprob is None:
-                    expanded_logprob_list[(each_beam_idx + 1) * self.vocab_size - 1]\
-                        = logprob_list[each_beam_idx]
-                    expanded_logprob_list[each_beam_idx * self.vocab_size : (each_beam_idx + 1) * self.vocab_size - 1]\
-                        = -np.inf
-                    expanded_length_list[each_beam_idx * self.vocab_size : (each_beam_idx + 1) * self.vocab_size]\
-                        = len(each_candidate)
-                else:
-                    expanded_logprob_list[each_beam_idx * self.vocab_size : (each_beam_idx + 1) * self.vocab_size - 1]\
-                        = logprob_list[each_beam_idx] + conditional_logprob
-                    expanded_logprob_list[(each_beam_idx + 1) * self.vocab_size - 1]\
-                        = -np.inf
-                    expanded_length_list[each_beam_idx * self.vocab_size : (each_beam_idx + 1) * self.vocab_size]\
-                        = len(each_candidate) + 1
-            score_list = np.array(expanded_logprob_list) / np.array(expanded_length_list)
-            if each_len == 0:
-                top_k_list = np.argsort(score_list[:self.vocab_size])[::-1][:beam_width]
-            else:
-                top_k_list = np.argsort(score_list)[::-1][:beam_width]
-            next_top_k_tgt_id_list = []
-            next_logprob_list = []
-            for each_top_k in top_k_list:
-                beam_idx = each_top_k // self.vocab_size
-                vocab_idx = each_top_k % self.vocab_size
-                if vocab_idx == self.vocab_size - 1:
-                    next_top_k_tgt_id_list.append(top_k_tgt_id_list[beam_idx])
-                    next_logprob_list.append(expanded_logprob_list[each_top_k])
-                else:
-                    next_top_k_tgt_id_list.append(top_k_tgt_id_list[beam_idx] + [vocab_idx])
-                    next_logprob_list.append(expanded_logprob_list[each_top_k])
-            top_k_tgt_id_list = next_top_k_tgt_id_list
-            logprob_list = next_logprob_list
-        # construct hypergraphs
-        hg_list = []
-        for each_tgt_id_list in top_k_tgt_id_list:
-            hg = None
-            stack = []
-            nt_edge = None
-            for each_idx, each_prod_rule_id in enumerate(each_tgt_id_list):
-                prod_rule = self.hrg.prod_rule_list[each_prod_rule_id]
-                hg, nt_edges = prod_rule.applied_to(hg, nt_edge)
-                stack.extend(nt_edges[::-1])
-                try:
-                    nt_edge = stack.pop()
-                except IndexError:
-                    if each_idx == len(each_tgt_id_list) - 1:
-                        break
-                    else:
-                        raise ValueError('some bugs')
-            hg_list.append(hg)
-        return hg_list
-    def graph_embed(self, x, edge_index, edge_attr, batch_size):
-        src_h = self.encoder.forward(x, edge_index, edge_attr, batch_size)
-        return src_h
-    def encode(self, x, edge_index, edge_attr, batch_size):
-        #print("device for src_emb=", src_emb.get_device())
-        #print("device for self.encoder=", next(self.encoder.parameters()).get_device())
-        src_h = self.graph_embed(x, edge_index, edge_attr, batch_size)
-        mu, lv = self.get_mean_var(src_h)
-        return mu, lv
-    def get_mean_var(self, src_h):
-        #src_h = torch.tanh(src_h)
-        mu = self.hidden2mean(src_h)
-        lv = self.hidden2logvar(src_h)
-        mu = torch.tanh(mu)
-        lv = torch.tanh(lv)
-        return mu, lv
-    def reparameterize(self, mu, logvar, training=True):
-        if training:
-            std = logvar.mul(0.5).exp_()
-            eps = Variable(std.data.new(std.size()).normal_())
-            if self.rank >= 0:
-                eps = eps.to(next(self.parameters()).device)
-            return eps.mul(std).add_(mu)
-        else:
-            return mu
-# Copied from the MHG implementation and adapted
-class GrammarVAELoss(_Loss):
-    '''
-    a loss function for Grammar VAE
-    Attributes
-    ----------
-    hrg : HyperedgeReplacementGrammar
-    beta : float
-        coefficient of KL divergence
-    '''
-    def __init__(self, rank, hrg, beta=1.0, **kwargs):
-        super().__init__(**kwargs)
-        self.hrg = hrg
-        self.beta = beta
-        self.rank = rank
-    def forward(self, mu, logvar, in_seq_pred, in_seq):
-        ''' compute VAE loss
-        Parameters
-        ----------
-        in_seq_pred : torch.Tensor, shape (batch_size, max_len, vocab_size)
-            logit
-        in_seq : torch.Tensor, shape (batch_size, max_len)
-            each element corresponds to a word id in vocabulary.
-        mu : torch.Tensor, shape (batch_size, hidden_dim)
-        logvar : torch.Tensor, shape (batch_size, hidden_dim)
-            mean and log variance of the normal distribution
-        '''
-        batch_size = in_seq_pred.shape[0]
-        max_len = in_seq_pred.shape[1]
-        vocab_size = in_seq_pred.shape[2]
-        mask = torch.zeros(in_seq_pred.shape)
-        for each_batch in range(batch_size):
-            flag = True
-            for each_idx in range(max_len):
-                prod_rule_idx = in_seq[each_batch, each_idx]
-                if prod_rule_idx == vocab_size - 1:
-                    #### DETERMINE WHETHER THIS SHOULD BE SKIPPED OR NOT
-                    mask[each_batch, each_idx, prod_rule_idx] = 1
-                    #break
-                    continue
-                lhs = self.hrg.prod_rule_corpus.prod_rule_list[prod_rule_idx].lhs_nt_symbol
-                lhs_idx = self.hrg.prod_rule_corpus.nt_symbol_list.index(lhs)
-                mask[each_batch, each_idx, :-1] = torch.FloatTensor(self.hrg.prod_rule_corpus.lhs_in_prod_rule[lhs_idx])
-            if self.rank >= 0:
-                mask = mask.to(next(self.parameters()).device)
-        in_seq_pred = mask * in_seq_pred
-        cross_entropy = F.cross_entropy(
-            in_seq_pred.view(-1, vocab_size),
-            in_seq.view(-1),
-            reduction='sum',
-            #ignore_index=self.ignore_index if self.ignore_index is not None else -100
-            )
-        kl_div = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
-        return cross_entropy + self.beta * kl_div
-class VAELoss(_Loss):
-    def __init__(self, beta=0.01):
-        super().__init__()
-        self.beta = beta
-    def forward(self, mean, log_var, dec_outputs, targets):
-        device = mean.get_device()
-        if device >= 0:
-            targets = targets.to(mean.get_device())
-        reconstruction = F.cross_entropy(dec_outputs.view(-1, dec_outputs.size(2)), targets.view(-1), reduction='sum')
-        KL = 0.5 * torch.sum(1 + log_var - mean ** 2 - torch.exp(log_var))
-        loss = - self.beta * KL + reconstruction
-        return loss

notebooks/mhg-gnn_encoder_decoder_example.ipynb DELETED Viewed

@@ -1,114 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "829ddc03",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import sys\n",
-    "sys.path.append('..')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "ea820e23",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import torch\n",
-    "import load"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "b9a51fa8",
-   "metadata": {},
-   "source": [
-    "# Load MHG-GNN"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "c6ea1fc8",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "model_ckp = \"models/model_checkpoints/mhg_model/pickles/mhggnn_pretrained_model_radius7_1116_2023.pickle\"\n",
-    "\n",
-    "model = load.load(model_name = model_ckp)\n",
-    "if model is None:\n",
-    "    print(\"Model not loaded, please check you have MHG pickle file\")\n",
-    "else:\n",
-    "    print(\"MHG model loaded\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "b4a0b557",
-   "metadata": {},
-   "source": [
-    "# Embeddings\n",
-    "\n",
-    "※ replace the smiles exaple list with your dataset"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "c63a6be6",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "with torch.no_grad():\n",
-    "    repr = model.encode([\"CCO\", \"O=C=O\", \"OC(=O)c1ccccc1C(=O)O\"])\n",
-    "    \n",
-    "# Print the latent vectors\n",
-    "print(repr)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "a59f9442",
-   "metadata": {},
-   "source": [
-    "# Decoding"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "6a0d8a41",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "orig = model.decode(repr)\n",
-    "print(orig)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.10"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}

paper/MHG-GNN_Combination of Molecular Hypergraph Grammar with Graph Neural Network.pdf DELETED Viewed

Binary file (343 kB)

pickles/.DS_Store DELETED Viewed

Binary file (6.15 kB)