How to tell which of two glob patterns is more general - algorithm

Let's say we have two glob patterns, for example app/src/**/* and app/src/some-dir/**/*.
Both patterns match some path, for example app/src/some-dir/some-other-dir/my-file.txt.
I need to tell, which of this two (or more) patterns is more general. In other words, which of patterns is a subset of another pattern. In example above more general pattern is app/src/**/*, because it matches everything in app/src, and second pattern matches everything in a subdirectory of app/src.
The first thought is to tell, which path prefix is longer (part of pattern before any special symbols like *), but I believe that's not a solution to a problem, since pattern can be more complicated and can include special characters in different places, something like app/*/some-dir/some-other-dir.
Is there any reliable solution for this problem, that does not require actual globbing and counting matched files (since it may be a slow operation) ?

The textbook method for this is to convert each glob to a deterministic finite automaton, then construct the product of the automata (implicitly here), and search for a state that is accepting in one but not the other. Depending on how many subset checks you're planning to do and how big the state machines get, it may be worth adding a DFA minimization stage (also covered in your favorite automata theory textbook).
Some rough Python 3:
import re
def glob_tokens_from_glob(g):
return re.findall(r"\*\*?|[^*]", g)
def is_wild(token):
return token.startswith("*") or token == "?"
def epsilon_successors(tokens, i):
yield i
while i < len(tokens) and tokens[i].startswith("*"):
i += 1
yield i
def successors(tokens, i, sym):
if i >= len(tokens):
pass
elif tokens[i] == "**":
yield i
elif tokens[i] == "*":
if sym != "/":
yield i
elif tokens[i] == "?":
if sym != "/":
yield i + 1
elif tokens[i] == sym:
yield i + 1
def successor_dict(tokens, q):
symbols = {tokens[i] for i in q if i < len(tokens) if not is_wild(tokens[i])}
symbols.update({"/", "[^/]"})
return {
sym: frozenset(
k
for i in q
for j in successors(tokens, i, sym)
for k in epsilon_successors(tokens, j)
)
for sym in symbols
}
def dfa_from_glob_tokens(tokens):
q0 = frozenset(epsilon_successors(tokens, 0))
delta = {frozenset(): {"[^/]": frozenset()}}
stack = [q0]
while stack:
q = stack.pop()
if q in delta:
continue
d = successor_dict(tokens, q)
stack.extend(d.values())
delta[q] = d
return (q0, delta, {q for q in delta.keys() if len(tokens) in q})
def dfa_from_glob(g):
return dfa_from_glob_tokens(glob_tokens_from_glob(g))
def successor(d, sym):
if sym in d:
return d[sym]
elif sym == "/":
return frozenset()
else:
return d["[^/]"]
def dfa_matches_subset(dfa_a, dfa_b):
q0_a, delta_a, f_a = dfa_a
q0_b, delta_b, f_b = dfa_b
stack = [(q0_a, q0_b)]
visited = set()
while stack:
q = stack.pop()
if q in visited:
continue
visited.add(q)
q_a, q_b = q
if q_a in f_a and q_b not in f_b:
return False
d_a = delta_a[q_a]
d_b = delta_b[q_b]
symbols = set(d_a.keys())
symbols.update(d_b.keys())
stack.extend((successor(d_a, sym), successor(d_b, sym)) for sym in symbols)
return True
def test():
dfa1 = dfa_from_glob("app/src/**/*")
dfa2 = dfa_from_glob("app/src/some-dir/**/*")
dfa3 = dfa_from_glob("app/src/some-dir/some-other-dir/my-file.txt")
dfa4 = dfa_from_glob("app/*/some-dir/some-other-dir/*")
dfa5 = dfa_from_glob("*")
dfa6 = dfa_from_glob("/")
dfa7 = dfa_from_glob("?")
dfa8 = dfa_from_glob("b")
dfa9 = dfa_from_glob("cc")
dfas = [dfa1, dfa2, dfa3, dfa4, dfa5, dfa6, dfa7, dfa8, dfa9]
for a in dfas:
for b in dfas:
print(int(dfa_matches_subset(a, b)), end=" ")
print()
test()

Related

LeetCode 1707. Maximum XOR With an Element From Array

You are given an array nums consisting of non-negative integers. You are also given a queries array, where queries[i] = [xi, mi].
The answer to the ith query is the maximum bitwise XOR value of xi and any element of nums that does not exceed mi. In other words, the answer is max(nums[j] XOR xi) for all j such that nums[j] <= mi. If all elements in nums are larger than mi, then the answer is -1.
Return an integer array answer where answer.length == queries.length and answer[i] is the answer to the ith query.
This python solution uses Trie, but still LeetCode shows TLE?
import operator
class TrieNode:
def __init__(self):
self.left=None
self.right=None
class Solution:
def insert(self,head,x):
curr=head
for i in range(31,-1,-1):
val = (x>>i) & 1
if val==0:
if not curr.left:
curr.left=TrieNode()
curr=curr.left
else:
curr=curr.left
else:
if not curr.right:
curr.right=TrieNode()
curr=curr.right
else:
curr=curr.right
def maximizeXor(self, nums: List[int], queries: List[List[int]]) -> List[int]:
res=[-10]*len(queries)
nums.sort()
for i in range(len(queries)):
queries[i].append(i)
queries.sort(key=operator.itemgetter(1))
head=TrieNode()
for li in queries:
max=0
xi,mi,index=li[0],li[1],li[2]
m=2**31
node = head
pos=0
if mi<nums[0]:
res[index]=-1
continue
for i in range(pos,len(nums)):
if mi<nums[i]:
pos=i
break
self.insert(node,nums[i])
node=head
for i in range(31,-1,-1):
val=(xi>>i)&1
if val==0:
if node.right:
max+=m
node=node.right
else:
node=node.left
else:
if node.left:
max+=m
node=node.left
else:
node=node.right
m>>=1
res[index]=max
return -1
here is alternative Trie implement to solve this problem:
[Notes: 1) max(x XOR y for y in A); 2) do the greedy on MSB bit; 3) sort the queries]
class Trie:
def __init__(self):
self.root = {}
def add(self, n):
p = self.root
for bitpos in range(31, -1, -1):
bit = (n >> bitpos) & 1
if bit not in p:
p[bit] = {}
p = p[bit]
def query(self, n):
p = self.root
ret = 0
if not p:
return -1
for bitpos in range(31, -1, -1):
bit = (n >> bitpos) & 1
inverse = 1 - bit
if inverse in p:
p = p[inverse]
ret |= (1 << bitpos)
else:
p = p[bit]
return ret
class Solution:
def maximizeXor(self, nums: List[int], queries: List[List[int]]) -> List[int]:
n = len(nums)
trie = Trie()
q = sorted(enumerate(queries), key = lambda x: x[1][1])
nums.sort()
res = [-1] * len(queries)
i = 0
for index, (x, m) in q:
while i < n and nums[i] <= m:
trie.add(nums[i])
i += 1
res[index] = trie.query(x)
return res
The problem is that you're building a fresh Trie for each query. And to make matters worse, use linear search to find the maximum value <= mi in nums. You'd be better off by simply using
max((n for n in nums if n <= mi), key=lambda n: n^xi, default=-1)
The solution here would be to build the trie right at the start and simply filter for values smaller than mi using that trie:
import math
import bisect
def dump(t, indent=''):
if t is not None:
print(indent, "bit=", t.bit, "val=", t.val, "lower=", t.lower)
dump(t.left, indent + '\tl')
dump(t.right, indent + '\tr')
class Trie:
def __init__(self, bit, val, lower):
self.bit = bit
self.val = val
self.lower = lower
self.left = None
self.right = None
def solve(self, mi, xi):
print('-------------------------------------------')
print(self.bit, "mi(b)=", (mi >> self.bit) & 1, "xi(b)=", (xi >> self.bit) & 1, "mi=", mi, "xi=", xi)
dump(self)
if self.val is not None:
# reached a leave of the trie => found matching value
print("Leaf")
return self.val
if mi & (1 << self.bit) == 0:
# the maximum has a zero-bit at this position => all values in the right subtree are > mi
print("Left forced by max")
return -1 if self.left is None else self.left.solve(mi, xi)
# pick based on xor-value if possible
if (xi >> self.bit) & 1 == 0 and self.right is not None and (mi > self.right.lower or mi == ~0):
print("Right preferred by xi")
return self.right.solve(mi, xi)
elif (xi >> self.bit) & 1 == 1 and self.left is not None:
print("Left preferred by xi")
return self.left.solve(~0, xi)
# pick whichever is available
if self.right is not None and (mi > self.right.lower or mi == ~0):
print("Only right available")
return self.right.solve(mi, xi)
elif self.left is not None:
print("Only left available")
return self.left.solve(~0, xi)
else:
print("None available")
return -1
def build_trie(nums):
nums.sort()
# msb of max(nums)
max_bit = int(math.log(nums[-1], 2)) # I'll just assume that nums is never empty
print(max_bit)
def node(start, end, bit, template):
print(start, end, bit, template, nums[start:end])
if end - start == 1:
# reached a leaf
return Trie(0, nums[start], nums[start])
elif start == end:
# a partition without values => no Trie-node
return None
# find pivot for partitioning based on bit-value of specified position (bit)
part = bisect.bisect_left(nums, template | (1 << bit), start, end)
print(part)
# build nodes for paritioning
res = Trie(bit, None, nums[start])
res.left = node(start, part, bit - 1, template)
res.right = node(part, end, bit - 1, template | (1 << bit))
return res
return node(0, len(nums), max_bit, 0)
class Solution:
def maximizeXor(self, nums: List[int], queries: List[List[int]]) -> List[int]:
trie = build_trie(nums)
return [trie.solve(mi if mi <= nums[-1] else ~0, xi) for xi, mi in queries]
I've been a bit lazy and simply used ~0 to signify that the maximum can be ignored since all values in the subtree are smaller than mi. The basic idea is that ~0 & x == x is true for any integer x. Not quite as simple as #DanielHao's answer, but capable of handling streams of queries.

Dynamic Programming for shortest subsequence that is not a subsequence of two strings

Problem: Given two sequences s1 and s2 of '0' and '1'return the shortest sequence that is a subsequence of neither of the two sequences.
E.g. s1 = '011' s2 = '1101' Return s_out = '00' as one possible result.
Note that substring and subsequence are different where substring the characters are contiguous but in a subsequence that needs not be the case.
My question: How is dynamic programming applied in the "Solution Provided" below and what is its time complexity?
My attempt involves computing all the subsequences for each string giving sub1 and sub2. Append a '1' or a '0' to each sub1 and determine if that new subsequence is not present in sub2.Find the minimum length one. Here is my code:
My Solution
def get_subsequences(seq, index, subs, result):
if index == len(seq):
if subs:
result.add(''.join(subs))
else:
get_subsequences(seq, index + 1, subs, result)
get_subsequences(seq, index + 1, subs + [seq[index]], result)
def get_bad_subseq(subseq):
min_sub = ''
length = float('inf')
for sub in subseq:
for char in ['0', '1']:
if len(sub) + 1 < length and sub + char not in subseq:
length = len(sub) + 1
min_sub = sub + char
return min_sub
Solution Provided (not mine)
How does it work and its time complexity?
It looks that the below solution looks similar to: http://kyopro.hateblo.jp/entry/2018/12/11/100507
def set_nxt(s, nxt):
n = len(s)
idx_0 = n + 1
idx_1 = n + 1
for i in range(n, 0, -1):
nxt[i][0] = idx_0
nxt[i][1] = idx_1
if s[i-1] == '0':
idx_0 = i
else:
idx_1 = i
nxt[0][0] = idx_0
nxt[0][1] = idx_1
def get_shortest(seq1, seq2):
len_seq1 = len(seq1)
len_seq2 = len(seq2)
nxt_seq1 = [[len_seq1 + 1 for _ in range(2)] for _ in range(len_seq1 + 2)]
nxt_seq2 = [[len_seq2 + 1 for _ in range(2)] for _ in range(len_seq2 + 2)]
set_nxt(seq1, nxt_seq1)
set_nxt(seq2, nxt_seq2)
INF = 2 * max(len_seq1, len_seq2)
dp = [[INF for _ in range(len_seq2 + 2)] for _ in range(len_seq1 + 2)]
dp[len_seq1 + 1][len_seq2 + 1] = 0
for i in range( len_seq1 + 1, -1, -1):
for j in range(len_seq2 + 1, -1, -1):
for k in range(2):
if dp[nxt_seq1[i][k]][nxt_seq2[j][k]] < INF:
dp[i][j] = min(dp[i][j], dp[nxt_seq1[i][k]][nxt_seq2[j][k]] + 1);
res = ""
i = 0
j = 0
while i <= len_seq1 or j <= len_seq2:
for k in range(2):
if (dp[i][j] == dp[nxt_seq1[i][k]][nxt_seq2[j][k]] + 1):
i = nxt_seq1[i][k]
j = nxt_seq2[j][k]
res += str(k)
break;
return res
I am not going to work it through in detail, but the idea of this solution is to create a 2-D array of every combinations of positions in the one array and the other. It then populates this array with information about the shortest sequences that it finds that force you that far.
Just constructing that array takes space (and therefore time) O(len(seq1) * len(seq2)). Filling it in takes a similar time.
This is done with lots of bit twiddling that I don't want to track.
I have another approach that is clearer to me that usually takes less space and less time, but in the worst case could be as bad. But I have not coded it up.
UPDATE:
Here is is all coded up. With poor choices of variable names. Sorry about that.
# A trivial data class to hold a linked list for the candidate subsequences
# along with information about they match in the two sequences.
import collections
SubSeqLinkedList = collections.namedtuple('SubSeqLinkedList', 'value pos1 pos2 tail')
# This finds the position after the first match. No match is treated as off the end of seq.
def find_position_after_first_match (seq, start, value):
while start < len(seq) and seq[start] != value:
start += 1
return start+1
def make_longer_subsequence (subseq, value, seq1, seq2):
pos1 = find_position_after_first_match(seq1, subseq.pos1, value)
pos2 = find_position_after_first_match(seq2, subseq.pos2, value)
gotcha = SubSeqLinkedList(value=value, pos1=pos1, pos2=pos2, tail=subseq)
return gotcha
def minimal_nonsubseq (seq1, seq2):
# We start with one candidate for how to start the subsequence
# Namely an empty subsequence. Length 0, matches before the first character.
candidates = [SubSeqLinkedList(value=None, pos1=0, pos2=0, tail=None)]
# Now we try to replace candidates with longer maximal ones - nothing of
# the same length is better at going farther in both sequences.
# We keep this list ordered by descending how far it goes in sequence1.
while candidates[0].pos1 <= len(seq1) or candidates[0].pos2 <= len(seq2):
new_candidates = []
for candidate in candidates:
candidate1 = make_longer_subsequence(candidate, '0', seq1, seq2)
candidate2 = make_longer_subsequence(candidate, '1', seq1, seq2)
if candidate1.pos1 < candidate2.pos1:
# swap them.
candidate1, candidate2 = candidate2, candidate1
for c in (candidate1, candidate2):
if 0 == len(new_candidates):
new_candidates.append(c)
elif new_candidates[-1].pos1 <= c.pos1 and new_candidates[-1].pos2 <= c.pos2:
# We have found strictly better.
new_candidates[-1] = c
elif new_candidates[-1].pos2 < c.pos2:
# Note, by construction we cannot be shorter in pos1.
new_candidates.append(c)
# And now we throw away the ones we don't want.
# Those that are on their way to a solution will be captured in the linked list.
candidates = new_candidates
answer = candidates[0]
r_seq = [] # This winds up reversed.
while answer.value is not None:
r_seq.append(answer.value)
answer = answer.tail
return ''.join(reversed(r_seq))
print(minimal_nonsubseq('011', '1101'))

DNA subsequence dynamic programming question

I'm trying to solve DNA problem which is more of improved(?) version of LCS problem.
In the problem, there is string which is string and semi-substring which allows part of string to have one or no letter skipped. For example, for string "desktop", it has semi-substring {"destop", "dek", "stop", "skop","desk","top"}, all of which has one or no letter skipped.
Now, I am given two DNA strings consisting of {a,t,g,c}. I"m trying to find longest semi-substring, LSS. and if there is more than one LSS, print out the one in the fastest order.
For example, two dnas {attgcgtagcaatg, tctcaggtcgatagtgac} prints out "tctagcaatg"
and aaaattttcccc, cccgggggaatatca prints out "aattc"
I'm trying to use common LCS algorithm but cannot solve it with tables although I did solve the one with no letter skipped. Any advice?
This is a variation on the dynamic programming solution for LCS, written in Python.
First I'm building up a Suffix Tree for all the substrings that can be made from each string with the skip rule. Then I'm intersecting the suffix trees. Then I'm looking for the longest string that can be made from that intersection tree.
Please note that this is technically O(n^2). Its worst case is when both strings are the same character, repeated over and over again. Because you wind up with a lot of what logically is something like, "an 'l' at position 42 in the one string could have matched against position l at position 54 in the other". But in practice it will be O(n).
def find_subtree (text, max_skip=1):
tree = {}
tree_at_position = {}
def subtree_from_position (position):
if position not in tree_at_position:
this_tree = {}
if position < len(text):
char = text[position]
# Make sure that we've populated the further tree.
subtree_from_position(position + 1)
# If this char appeared later, include those possible matches.
if char in tree:
for char2, subtree in tree[char].iteritems():
this_tree[char2] = subtree
# And now update the new choices.
for skip in range(max_skip + 1, 0, -1):
if position + skip < len(text):
this_tree[text[position + skip]] = subtree_from_position(position + skip)
tree[char] = this_tree
tree_at_position[position] = this_tree
return tree_at_position[position]
subtree_from_position(0)
return tree
def find_longest_common_semistring (text1, text2):
tree1 = find_subtree(text1)
tree2 = find_subtree(text2)
answered = {}
def find_intersection (subtree1, subtree2):
unique = (id(subtree1), id(subtree2))
if unique not in answered:
answer = {}
for k, v in subtree1.iteritems():
if k in subtree2:
answer[k] = find_intersection(v, subtree2[k])
answered[unique] = answer
return answered[unique]
found_longest = {}
def find_longest (tree):
if id(tree) not in found_longest:
best_candidate = ''
for char, subtree in tree.iteritems():
candidate = char + find_longest(subtree)
if len(best_candidate) < len(candidate):
best_candidate = candidate
found_longest[id(tree)] = best_candidate
return found_longest[id(tree)]
intersection_tree = find_intersection(tree1, tree2)
return find_longest(intersection_tree)
print(find_longest_common_semistring("attgcgtagcaatg", "tctcaggtcgatagtgac"))
Let g(c, rs, rt) represent the longest common semi-substring of strings, S and T, ending at rs and rt, where rs and rt are the ranked occurences of the character, c, in S and T, respectively, and K is the number of skips allowed. Then we can form a recursion which we would be obliged to perform on all pairs of c in S and T.
JavaScript code:
function f(S, T, K){
// mapS maps a char to indexes of its occurrences in S
// rsS maps the index in S to that char's rank (index) in mapS
const [mapS, rsS] = mapString(S)
const [mapT, rsT] = mapString(T)
// h is used to memoize g
const h = {}
function g(c, rs, rt){
if (rs < 0 || rt < 0)
return 0
if (h.hasOwnProperty([c, rs, rt]))
return h[[c, rs, rt]]
// (We are guaranteed to be on
// a match in this state.)
let best = [1, c]
let idxS = mapS[c][rs]
let idxT = mapT[c][rt]
if (idxS == 0 || idxT == 0)
return best
for (let i=idxS-1; i>=Math.max(0, idxS - 1 - K); i--){
for (let j=idxT-1; j>=Math.max(0, idxT - 1 - K); j--){
if (S[i] == T[j]){
const [len, str] = g(S[i], rsS[i], rsT[j])
if (len + 1 >= best[0])
best = [len + 1, str + c]
}
}
}
return h[[c, rs, rt]] = best
}
let best = [0, '']
for (let c of Object.keys(mapS)){
for (let i=0; i<(mapS[c]||[]).length; i++){
for (let j=0; j<(mapT[c]||[]).length; j++){
let [len, str] = g(c, i, j)
if (len > best[0])
best = [len, str]
}
}
}
return best
}
function mapString(s){
let map = {}
let rs = []
for (let i=0; i<s.length; i++){
if (!map[s[i]]){
map[s[i]] = [i]
rs.push(0)
} else {
map[s[i]].push(i)
rs.push(map[s[i]].length - 1)
}
}
return [map, rs]
}
console.log(f('attgcgtagcaatg', 'tctcaggtcgatagtgac', 1))
console.log(f('aaaattttcccc', 'cccgggggaatatca', 1))
console.log(f('abcade', 'axe', 1))

Recursive solution to common longest substring between two strings

I am trying to return the length of a common substring between two strings. I'm very well aware of the DP solution, however I want to be able to solve this recursively just for practice.
I have the solution to find the longest common subsequence...
def get_substring(str1, str2, i, j):
if i == 0 or j == 0:
return
elif str1[i-1] == str2[j-1]:
return 1 + get_substring(str1, str2, i-1, j-1)
else:
return max(get_substring(str1, str2, i, j-1), get_substring(str1, str2, j-1, i))
However, I need the longest common substring, not the longest common sequence of letters. I tried altering my code in a couple of ways, one being changing the base case to...
if i == 0 or j == 0 or str1[i-1] != str2[j-1]:
return 0
But that did not work, and neither did any of my other attempts.
For example, for the following strings...
X = "AGGTAB"
Y = "BAGGTXAYB"
print(get_substring(X, Y, len(X), len(Y)))
The longest substring is AGGT.
My recursive skills are not the greatest, so if anybody can help me out that would be very helpful.
package algo.dynamic;
public class LongestCommonSubstring {
public static void main(String[] args) {
String a = "AGGTAB";
String b = "BAGGTXAYB";
int maxLcs = lcs(a.toCharArray(), b.toCharArray(), a.length(), b.length(), 0);
System.out.println(maxLcs);
}
private static int lcs(char[] a, char[] b, int i, int j, int count) {
if (i == 0 || j == 0)
return count;
if (a[i - 1] == b[j - 1]) {
count = lcs(a, b, i - 1, j - 1, count + 1);
}
count = Math.max(count, Math.max(lcs(a, b, i, j - 1, 0), lcs(a, b, i - 1, j, 0)));
return count;
}
}
You need to recurse on each separately. Which is easier to do if you have multiple recursive functions.
def longest_common_substr_at_both_start (str1, str2):
if 0 == len(str1) or 0 == len(str2) or str1[0] != str2[0]:
return ''
else:
return str1[0] + longest_common_substr_at_both_start(str1[1:], str2[1:])
def longest_common_substr_at_first_start (str1, str2):
if 0 == len(str2):
return ''
else:
answer1 = longest_common_substr_at_both_start (str1, str2)
answer2 = longest_common_substr_at_first_start (str1, str2[1:])
return answer2 if len(answer1) < len(answer2) else answer1
def longest_common_substr (str1, str2):
if 0 == len(str1):
return ''
else:
answer1 = longest_common_substr_at_first_start (str1, str2)
answer2 = longest_common_substr(str1[1:], str2)
return answer2 if len(answer1) < len(answer2) else answer1
print(longest_common_substr("BAGGTXAYB","AGGTAB") )
I am so sorry. I didn't have time to convert this into a recursive function. This was relatively straight forward to compose. If Python had a fold function a recursive function would be greatly eased. 90% of recursive functions are primitive. That's why fold is so valuable.
I hope the logic in this can help with a recursive version.
(x,y)= "AGGTAB","BAGGTXAYB"
xrng= range(len(x)) # it is used twice
np=[(a+1,a+2) for a in xrng] # make pairs of list index values to use
allx = [ x[i:i+b] for (a,b) in np for i in xrng[:-a]] # make list of len>1 combinations
[ c for i in range(len(y)) for c in allx if c == y[i:i+len(c)]] # run, matching x & y
...producing this list from which to take the longest of the matches
['AG', 'AGG', 'AGGT', 'GG', 'GGT', 'GT']
I didn't realize getting the longest match from the list would be a little involved.
ls= ['AG', 'AGG', 'AGGT', 'GG', 'GGT', 'GT']
ml= max([len(x) for x in ls])
ls[[a for (a,b) in zip(range(len(ls)),[len(x) for x in ls]) if b == ml][0]]
"AGGT"

Algorithms, DFS

I've written a program to find shortest path in a N*N grid recursively.
def dfs(x,y,Map,p):
N = len(Map)
p += [[x,y]]
if Map[x][y] == 'E':
return p
for i in [[x-1,y],[x+1,y],[x,y-1],[x,y+1]]:
if N > i[0] >= 0 and N > i[1] >= 0 :
if (Map[i[0]][i[1]] == 'P' or Map[i[0]][i[1]] == 'E') and i not in p:
dfs(i[0], i[1], Map,p)
return []
When Map[x][y] = 'E' the recursion don't stop and return p. But it goes till the end. How to correct it and return the path(p).
By the looks of it, the code is prone to loop indefinitely. This is due to lack of checks whether you've entered a node before and moving in all (4) directions from a given node.
To solve it simply, add another array NxN of Boolean values answering the question: visited?. Then update the code to something along the lines:
def dfs(x,y,Map,visited,p):
visited[x,y] = true;
N = len(Map)
(...)
if (Map[i[0]][i[1]] == 'P' or Map[i[0]][i[1]] == 'E')
and i not in p
and visited[i[0], i[1]] == false:
dfs(i[0], i[1], Map,visited,p)

Resources