MATLAB CODES for topological analysis of text and grammar extraction  in complexity  (http://spirospero.net/complexity.htm)

REMINDER: start the input string P with 'START' and end with 'END');

See http://spirospero.net/Salt2.pdf for use

===========================================================.

 

pg2.m

 

 

%pg  : gives lists of words as NAME,

%and LEFT and RIGHT neighbors; 

%array sizes made repetitive for other purposes.

%Text should be preloaded as character %string P=char('abc', 'bad', etc.)

tic

zname=[]; z=1;

 

words=[]; W = P; wname=P; %wname contains all names with

                                                                                                                                    %repetition

WW=[]; %comparison cell; compare W=P and wname=P

                       

                        sw=size(W); sww=sw(1,2);

 

twd=' '  ; % twd is word template, gives empty word

 

                        for k=1:sww, twd(k)=char(' '); end, t=twd;

%t is an array of empty words

words(1).name= char(t);w=0; m=0;

%empty initial array of names

 

lenW  = length(W);                   % number of words in text

 

for  i=1: lenW, m=0; WW=W(i,:);   z=1;

            for j =1:lenW

 

WWW=W(j,:); k=isequal (WW, WWW); %WWW is a comparison cell

                                                if  k==1 &  j>i,

                        wname(j,:)=char(t); m = 1; %inserts a gap into text

                                                z=z+1;

                                                end,

                        end, zname(i)=z;

end

res=length(wname); w=0;

 

for f=1:res,

    if isequal (wname (f,:), char(t)) ==0,

                        w=w+1;  words(w).name = wname(f,:);Z(w)=zname(f);

              end,

end

lnW  = length(W)-1;  lew = length (words); NL=1;NR=1;

 words(1).L(1)= {'#'} ;  

 words(1).R(1)= {words(2).name}; % included words(1).L(1)={'#'};

for  k= 1:lew, NL=1; NR=1;

            for j= 2:lnW,            

            d = isequal (words(k).name, W(j,:));

                        if d==1,

            words(k).L(NL)={W(j-1,:)};words(k).R(NR)={W(j+1,:)};

             NL=NL+1;NR=NR+1; 

                        end

            end 

end

 

%display NAME

disp(blanks(2)'), disp ('WORDS-NAME'), disp(blanks(2)')

F=char('00');

for j=1:lew,F=num2str(Z(j));FF=words(j).name;

    disp(F),disp(FF)

end

%disp(blanks(2)') %for r=1:lew,  disp (Z(r)),end

pgwrk %script to display L and R neighbors

toc; t=toc

 

 

 

 

pgwrk.m

 

%pgwrk displays LEFT and RIGHT neighbors; uses pigwork.mat

disp(blanks(2)'), disp ('WORDS-LEFT'), disp(blanks(2)')

f = char(words.name); lw=length(f);

for m=1:lw,     

            A=words(m).L;  %list of LEFT neighbors with repetitions

            A=deblank(A);

            CS=A;

            %%%%%%%%%

            pigwork,  %counts repetitions

            %%%%%%%%%

            A=CS;

            wordsa(m).L=A;

end

 

for mm=1:(lw-1),

      S=wordsa(mm).L;  QL=size(S) ; sl=QL(2)-1;

      SK=wordsa(mm).L(1);

            for k=1:sl,

                        B=SK; D= wordsa(mm).L(k+1); SK=strcat(B,',',D);  B=SK;

            end

            CAS=SK; CAS=strrep (CAS,',',', '); DDD= char(CAS); disp (DDD),

end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

disp(blanks(2)'), disp ('WORDS-RIGHT'), disp(blanks(2)')

for m=1:lw,

            A=words(m).R;  %list of RIGHT neighbors with repetitions

            A=deblank(A); CS=A;

            %%%%%%%%%

            pigwork,%counts repetitions

            %%%%%

            A=CS; wordsa(m).R=A;

end

for mm=1:lw

      S=wordsa(mm).R; QR=size(S) ; sr=QR(2)-1; SK=wordsa(mm).R(1);

      for k=1:sr, B=SK; D= wordsa(mm).R(k+1);

                                    SK=strcat(B,',',D); B=SK;

                        end

               CAS=SK; CAS=strrep (CAS,',',', '); DDD= char(CAS);

                        disp (DDD)

           

end

 

 

 

 

 

pigwork.m

 

 

 

CS; CSS=unique(CS); lcs=length(CS); lcss=length(CSS); c=0;

for s=1:lcss, %base word

   c=0;%start count

                        for k=1:lcs %comparison word   

                        if strcmp(CSS(s),CS(k))==1,

            c=c+1; %count          

                        end %of counting

            end %of checking and counting of CS(s)

           if c>1, % if c==1, do not change CSS

         cac=int2str(c); %converting into string

         D=strcat(cac,'-',CSS(s)); CSS(s)= D;

           end

end

     CS=CSS;

 

 

 

 

 

 

 

 

 

 

 

ms.m

 

 

%mindset  script: ms

disp('REMINDER: start the input string with "START" and end with "END"');

tic; space=char(160);

%PART1   NAME ; unique list UNAME of G for G.name,

 

A=cellstr(P); G=[];   Q=[]; LP=length(P);

uA=unique(A); luA=length(uA); zer=[];

    for m=1:luA,        count=0;

        for n=1:LP

            if isequal (uA(m), A(n)), count=count+1;

                if count>1, zer=cat(2,zer,n);end,

            end

        end,

    end, %zer

                AL(1)=cellstr('START'); AL([2:LP])=A([1:(LP-1)]) ;

        AR(LP)=cellstr('END');  AR([1:(LP-1)])=A([2:LP]) ;

        UNAME=A;  UNAME([zer])=[];  lg=length(UNAME);

 

%PART 2: L&R

all(1)=1; arr(1)=1;ALL=[];ARR=[];

for k=1:lg,       % NAME from ANAME /P

    a=UNAME(k); al=[]; ar=[];   % first neighbors; correct ends later!

             cnl=0;cnr=0;

                for  j=1:LP    %search for neighbor copy

                     b=A(j); %br=AR(j);             bl=AL(j);

                                if isequal (a,b)==1,

                                al=cat(2,al,j); ar=cat(2,ar,j) ;

                                end

                end

    NB(k).L=al; NB(k).R=ar; %#################unique L and R neighbor list

 end  

   

    %PART3 copy count NAME, L&R

    for s=1:k

        G(s).name=UNAME(s);  

        zz=strmatch((UNAME(s)), A,'exact'); G(s).NQ=length(zz); G(s).AQ=zz; %%%%%%%%%

    

        X=AR([NB(s).R]); Y=unique(X); lub=length(Y);

        output=[]; numr=[];RN=[];

        for qq=1:lub,

           mtch=strmatch(Y(qq),X ,'exact');

           lmtch=length(mtch);

           w= Y(qq);

          

           if lmtch>1,output=strcat(output,num2str(lmtch),'-', w(1),';', space); end

          

            if lmtch==1,

              output=strcat(output, w(1),';', space); 

            end

          

           %output=strcat(space,output,num2str(lmtch),'-', w(1),';', space);

          

          

           numr=cat(2,numr,lmtch);

           wwr=cellstr(w(1));

           wu=strmatch (wwr,UNAME,'exact'); RN=cat(2, RN, wu(1));    %%%%%%%%%%%%%%%

         end,

        G(s).nr = numr;  

        G(s).R=cellstr(output);

        G(s).ReN=RN;

    

        X=AL([NB(s).L]); Y=unique(X); lub=length(Y);%###############

        output=[];numl=[]; LN=[];

        for qq=1:lub,

           mtch=strmatch(Y(qq), X, 'exact');

          

           lmtch=length(mtch);

          

           w= Y(qq); %%%%%%%%%%%

          

            if lmtch>1,

            output=strcat(output,num2str(lmtch),'-', w(1),';', space);

            end

          

            if lmtch==1,

              output=strcat(output, w(1),';', space); 

           end

          

           numl=cat(2,numl,lmtch);   

           wwl=cellstr(w(1));

           wu=strmatch (wwl,UNAME,'exact');    LN=cat(2, LN, wu(1));

        end,

    G(s).L=cellstr(output);%###############

    G(s).nl = numl;   

    G(s).LeN=LN;

    end   % structure G.name/L/R

    G(1).L=cellstr('START');  G(lg).R=cellstr('END'); %############

  

    %%%disp('Type dsgw to display output for comprehensive 10 column table of G');

    %%%disp ('Type dsgn to display output for narrow 5 column table of G');

    t=toc;

    tm=strcat('P = ', num2str(LP), ' G =  ', num2str(lg), '  time',':',space, num2str(t)) ;

    disp(tm);

  

 

 

cblr

 

 

%cblr    lists

%tic;

space=char(160);

CB=[]; CCR=[]; CCL=[]; %structure CB stores bonds and cats: CB.bond, CB.cat

g=1; gc=1;  gl=1;gcl=1;

for k=1:lg,

   L=G(k).nl  ; M=G(k).NQ; R=G(k).nr; kgr=G(k).ReN; kgl=G(k).LeN;

   

    %BONDS             

             

   lr=length(R);    ll=length(L); % 

    for j=1:lr,

        if ((M >=R(j)) & ( R(j)>=2)),CB(g).bond=[k  kgr(j)];g=g+1;

        end ,  

    end

                                                   

    %CATS (i.e., categories or classes)

   

    if lr>1, CCR(gc).cat=k; CCR(gc).catr=[kgr]; gc=gc+1;end

    if ll>1, CCL(gcl).cat=k; CCL(gcl).catl=[kgl];gcl=gcl+1; end

end

            %DISPLAY ADAPTED FOR TABLE FORMAT

   % copy to document,find and replace ^p^p for ^p, convert to table,

   % find and replace double spaces for single spaces, repeat until none found

 

lcb=length(CB); lccr=length(CCR);lccl=length(CCL);

disp(space)

disp( 'BONDs'),%prepared for 2-column table;

disp(space) 

%for c=1:lcb, disp(CB(c).bond), end

%%%for c=1:lcb, disp(CB(c).bond), BB= strcat(G([CB(c).bond]).name); disp(char(BB)),end

 

for c=1:lcb, disp (num2str(CB(c).bond)), BB= strcat(G([CB(c).bond(1)]).name,'+', G([CB(c).bond(2)]).name); disp(char(BB)), end

   % for c=1:(length(CB)), disp(CB(c).bond), BB= strcat(G([CB(c).bond(1)]).name, '_', G([CB(c).bond(2)]).name); disp(char(BB)), end

disp(space)

disp('RIGHT CATs' ), %prepared for 4-column table;

disp(space)

for c=1:lccr,

    %%%%disp (CCR(c).cat),

    %%%disp(CCR(c).catr),

    CTR=G(CCR(c).cat).name;

   disp(char(CTR)),

   DCTR=[];lcatr=length(CCR(c).catr);

        for cr=1:lcatr, CTR=CCR(c).catr(cr);

           

        DCTR = strcat(DCTR,G(CTR).name, ';',space);

       

    end,

   disp(char(DCTR)),

end

disp(space)

disp('LEFT CATs' ), %prepared for 4-column table;

disp(space)

for c=1:lccl,

   

    CTL=G(CCL(c).cat).name;

   

    %disp(char(CTL))%%%%%%%%%%

    CCTL=char(CTL);

    %%%%%%%%%%%%%%

   DCTL=[];lcatl=length(CCL(c).catl);

    for cl=1:lcatl, CTL=CCL(c).catl(cl);

        DCTL = strcat(DCTL, G(CTL).name,';',space);

    end,

   disp(char(DCTL)),    disp(CCTL)

   

end

disp(space)

%toc;t=toc;

%CB ,CCL,CCR,CTR, CTL

 

 

 

 

 

 

 

 

dsgn.m

 

 

%dsgn  displays data for a narrow 5-column table; convert text to table with MS-Word

for dp=1:lg,

disp(num2str(dp)),

disp(char(G(dp).L)),

disp (char(G(dp).name)),

disp(num2str(G(dp).NQ)),

disp(char(G(dp).R))

end

%script dsg : displays G for a comprehensive 10-column table

 

 

 

 

 

dsg.m

 

 

 

 

%script dsg : display data for a comprehensive table (redundant for common use)

%PART 4 : DISPLAY  

     

   disp('The output can be pasted into MS Word document and converted ')

   disp(' into a comprehensive table of 10 columns ')

   disp('ATTENTION: Remove empty lines before conversion')

   disp( 'with: "find and replace ^p^p for ^p"')

   disp('COLUMN HEADS: 1. Generator number in G; 2.left neighbors; 3. left neighbor positions in G;')

   disp ('4. occurrences of left neighbors in input P; 5. generator name; 6. generator positions in input P;')

   disp ('7. right neighbor positions in input P; 8. occurrences of right neighbors in input P');

   disp ('9. positions of right neighbors in G; 10. right neighbors;')

  

        %if DT==1,

               

        for dp=1:lg,

           %FOR COMREHENSIVE TABLE, 10 column

      

        %1  

        disp(dp) %NAME ordinal number

        %2

        disp(char(G(dp).L))

        %3

        disp(G(dp).LeN)%%%%%%%%%%%%%%%%%%%%%

        %4

        disp(G(dp).nl)%%%%%%%%%%%%%%%%%%

           %5

        disp (char(G(dp).name))

        %6

        disp ( G(dp).NQ)

        %7

        disp ((G(dp).AQ'))

          %8

        disp(G(dp).nr)%%%%%%%%%%%%%%%

        %9

        disp(G(dp).ReN)%%%%%%%%%%%

        %10

        disp(char(G(dp).R))

        end

        % for dp=1:lg,  disp(char (G(dp).name)), end;

        % for dp=1:lg,  disp(G(dp).QN), end;

        % for dp=1:lg,  disp(G(dp).AN), end;

        % for dp=1:lg,  disp(G(dp).nl), end;

        % for dp=1:lg,  disp(char (G(dp).L)), end;

        % for dp=1:lg,  disp(G(dp).LeN)), end;

        % for dp=1:lg,  disp(char (G(dp).R)), end;

        % for dp=1:lg,  disp(G(dp).ReN), end;

        % for dp=1:lg,  disp(G(dp).NQ), end;

        % for dp=1:lg,  disp((G(dp).AQ)'), end;

        %  [1:lg]'  %line numbers

        %end

 

email